1/101
Generated by Clajude
Name | Mastery | Learn | Test | Matching | Spaced |
---|
No study sessions yet.
How do you create variables and check their types?
name = 'John'
age = 25
print(type(name), type(age))
>>> <class 'str'> <class 'int'>
How do you create and manipulate lists?
fruits=['apple','banana']
fruits.append('orange')
fruits.extend(['grape','kiwi'])
print(fruits[0]) # First element
print(fruits[-1]) # Last element
>>>apple
>>>kiwi
How do you create and access dictionary data?
student = {'name': 'Alice', 'age': 22}
print(student['name'])
student['grade'] = 'A'
print(student.get('city', 'Unknown'))
>>>Alice
>>>Unknown
How do you iterate through data with for loops?
# Iterate over a list
for item in ['a', 'b', 'c']:
print(item)
>>>a
>>>b
>>>c
# Iterate with index
for i in range(5):
print(f"Number: {i}")
>>>Number: 0
>>>Number: 1
>>>Number: 2
>>>Number: 3
>>>Number: 4
How do you create lists efficiently using list comprehensions?
# Basic list comprehension
squares = [x**2 for x in range(5)]
print(squares)
>>>[0, 1, 4, 9, 16]
# With condition
evens = [x for x in range(10) if x % 2 == 0]
print(evens)
>>>[0, 2, 4, 6, 8]
# Nested comprehension
matrix = [[i*j for j in range(3)] for i in range(3)]
print(matrix)
[[0, 0, 0], [0, 1, 2], [0, 2, 4]]
How do you define and call functions?
def greet(name, greeting='Hello'):
return f'{greeting}, {name}!'
# Function calls
result = greet('Alice')
result2 = greet('Bob', 'Hi') # Specified a greeting other than the default 'Hello'
print(result)
>>>Hello, Alice!
print(result2)
>>>Hi, Bob!
How do you handle conditional statements?
x = 10
if x > 0:
print('positive')
elif x < 0:
print('negative')
else:
print('zero')
# Ternary operator
result = 'positive' if x > 0 else 'not positive'
>>>positive
How do you work with tuples?
# Create tuple
coords = (3, 5)
point = 1, 2, 3 # Parentheses optional
# Unpack tuple
x,y=coords
print(coords[0]) # Access by index
>>>3
# coords[0] = 10 # Error: tuples are immutable
How do you work with sets?
# Create set
unique_nums = {1, 2, 3, 3, 4} # Duplicates removed
my_set = set([1, 2, 3])
# Set operations
my_set.add(5)
print(my_set)
>>>{1, 2, 3, 5}
my_set.remove(1)
print(1 in my_set)
>>>False
print(my_set)
>>>{2, 3, 5}
How do you perform string operations?
text = "Hello World'
print(text.lower())
>>>hello world
print(text.upper())
>>>HELLO WORLD
print(text.split())
>>>['Hello', 'World']
print(text.replace('World', 'Python'))
>>>Hello Python
print(text.startswith('Hello'))
>>>True
How do you use while loops?
count = 0
while count < 3:
print(f'Count is: {count}')
count += 1
>>> Count is: 0
>>> Count is: 1
>>> Count is: 2
# With break and continue
How do you work with range()?
# Basic range
for i in range(5): # 0, 1, 2, 3, 4
print(i)
>>> 0
>>> 1
>>> 2
>>> 3
>>> 4
# Range with start, stop, step
for i in range(0, 10, 2):
print(i)
>>> 0
>>> 2
>>> 4
>>> 6
>>> 8
# Convert to list
numbers = list(range(5))
print(numbers)
>>> [0, 1, 2, 3, 4]
How do you check if an item is in a list/dict?
fruits = ['apple', 'banana']
student = {'name': 'Alice', 'age': 22}
print('apple' in fruits)
>>> True
print('name' in student)
>>> True
print('grade' in student)
>>> False
How do you sort lists?
numbers = [3, 1, 4, 1, 5]
# In-place sorting
numbers.sort()
print(numbers)
>>> [1, 1, 3, 4, 5]
# Return new sorted list
sorted_nums = sorted([3, 1, 4], reverse=True)
print(sorted_nums)
>>> [4, 3, 1]
How do you remove items from lists?
fruits = ['apple', 'banana', 'orange']
# Remove by value
fruits.remove('apple')
print(fruits)
>>> ['banana', 'orange']
# Remove by index
removed = fruits.pop(0)
print(fruits)
>>> ['banana', 'orange']
# Delete by index
del fruits[0]
print(fruits)
>>> ['banana', 'orange']
How do you get user input?
name = input('Enter your name: ')
age = int(input('Enter your age: '))
height = float(input('Enter height: '))
print(f'Hello {name}, you are {age} years old')
# Upon running, a text box will pop up for each option and user will enter values. For example, Name = Oliver, age = 89
>>> Hello Oliver, you are 89 years old
How do you convert between data types?
# String conversions to another data type
num_str = str(42)
>>> 42
num_int = int('42')
>>> 42
num_float = float('3.14')
>>> 3.14
# Collection conversions
my_list = list('hello')
print(my_list)
>>> ['h', 'e', 'l', 'l', 'o']
my_tuple=tuple([1, 2, 3])
>>> (1, 2, 3)
How do you work with multiple assignment?
# Multiple assignment
a, b = 1, 2
x = y = z = 0
# Swapping variables
a, b = b, a
# Unpacking
coords = (3, 5)
x, y = coords
print(coords)
>>> (3, 5)
How do you use enumerate()?
fruits = ['apple', 'banana', 'cherry']
# Get index and value
for i, fruit in enumerate(fruits):
print(f'{i}: {fruit}')
>>> 0: apple
>>> 1: banana
>>> 2: cherry
# Start counting from 1
for i, fruit in enumerate(fruits, 1):
print(f'{i}: {fruit}')
>>> 1: apple
>>> 2: banana
>>> 3: cherry
How do you use zip()?
names = [‘Alice’, ‘Bob’, ‘Charlie’]
ages = [25, 30, 35]
cities = [‘NYC’, ‘LA’, ‘Chicago’]
# Combine multiple lists
for name, age, city in zip(names, ages, cities):
print(f'{name}, {age}, {city}')
>>> Alice, 25, NYC
>>> Bob, 30, LA
>>> Charlie, 35, Chicago
How do you create NumPy arrays?
import numpy as np
# From list
arr = np.array([1, 2, 3])
print(arr)
>>> [1 2 3]
# Create special arrays
zeros = np.zeros((3, 4)) # 3x4 array of zeros
>>> [[0. 0. 0. 0.]
>>> [0. 0. 0. 0.]
>>> [0. 0. 0. 0.]]
ones = np.ones((2, 3)) # 2x3 array of ones
>>> [[1. 1. 1.]
>>> [1. 1. 1.]]
eye = np.eye(3) # 3x3 identity matrix
>>> [[1. 0. 0.]
>>> [0. 1. 0.]
>>> [0. 0. 1.]]
How do you access NumPy array elements?
arr = np.array([[1,2,3], [4,5,6]])
# Basic indexing
print(arr[0]) # First row
print(arr[0,1]) # Element at row 0, col 1
print(arr[:, 1]) # All rows, col 1
print(arr[1:3]) # Rows 1 to 2
How do you perform mathematical operations on arrays?
a = np.array([1,2,3])
b = np.array(4,5,6])
# Element-wise operations
print(a + b) # [5, 7, 9]
print(a * b) # [4, 10, 18]
print(np.sqrt(a)) # Square root
print(np.exp(a)) # Exponential
How do you reshape NumPy arrays?
arr = np.array([a,2,3,4,5,6])
# Reshape to 2D
reshaped = arr.reshape(2,3) # 2 rows, 3 cols
# Flatten to 1D
flat = reshaped.flatten()
# Transpose
transposed = reshaped.T
How do you find array statistics?
arr = np.array([1,2,3,4,5])
print(np.mean(arr)) # Mean 3.0
print(np.std(arr)) # Standard deviation
print(np.min(arr)) # Minimum: 1
print(np.max(arr)) # Maximum: 5
print(np.median(arr)) # Median: 3.0
print(np.sum(arr)) # Sum: 15
How do you create arrays with specific values?
# Arrays with specific values
zeros = np.zeros(5) # [0,0,0,0,0}
ones = np.ones(3) # [1,1,1]
full = np.full(4, 7) # [7,7,7,7]
range_arr = np.arange(0, 10, 2) # [0,2,4,6,8]
linspace = np.linspace(0, 1, 5) # 5 points from 0 to 1
How do you perform boolean indexing?
arr = np.array([1, 2, 3, 4, 5, 6])
# Boolean conditions
print(arr > 3) # (False, False, False, True, True, True)
print(arr[arr > 3]) # [4,5,6]
# Multiple conditions
print(arr[(arr > 2) & (arr < 6)]) # [3,4,5]
How do you concatenate arrays?
arr1 = np.array([a,2,3])
arr2 = np.array([4,5,6])
# Concatenate 1D arrays
result = np.concantenate([arr1, arr2]) # [1,2,3,4,5,6]
# Stack arrays
vstack = np.vstack([arr1, arr2]) # Vertical stack
hstack = np.hstack([arr1, arr2]) # Horizontal stack
How do you perform matrix multiplication?
A = np.array([[1,2], [3,4]])
B = np.array([[5,6], [7,8]])
# Matrix multiplication
result1 = np.dot(A, B)
result2 = A @ B # Python 3.5+
result3 = np.matmul(A,B)
# Element-wise multiplication
result4 = A * B
How do you find unique values in arrays?
arr = np.array([1,2,2,3,3,3,4])
# Unique values
unique_vals = np.unique(arr) # [1,2,3,4]
#Unique values with counts
unique_vals, counts = np.unique(arr, return_counts=True)
print(unique_vals) # [1,2,3,4]
print(counts) # [1,2,3,1]
How do you save and load NumPy arrays?
arr = np.array([1,2,3,4,5])
# Save a single array
np.save('my_array.npy', arr)
loaded_arr = np.load('my_array.npy')
# Save multiple arrays
np.savez('arrays.npz', a=arr, b=arr*2)
data = np.load('arrays.npz')
print(data['a'], data['b'])
How do you perform broadcasting?
# Broadcasting allows operations on arrays of different shapes
arr = np.array([[1,2,3], [4,5,6]])
# Add scalar (broadcasts to all elements
result1 = arr + 10
# Add 1D array to 2D array
vec = np.array([1,2,3])
result2 = arr + vec # Adds vec to each row
How do you work with random numbers?
# Set seed for reproducibility
np.random.seed(42)
# Random arrays
rand_uniform = np.random.rand(3,4) # Uniform [0,1)
rand_normal = np.random.randn(3,4) # Standard normal
rand_int = np.random.randint(0,10,5) # Random integers
How do you perform array comparison?
arr1 = np.array([1,2,3])
arr2 = np.array([1,2,4])
# Element-wise comparison
print(arr1 == arr2) # [True, True, False]
# Array equality
print(np.array_equal(arr1, arr2)) # False
# All/any conditions
print(np.all(arr1 > 0)) # True
print(any(arr1 > 2)) # True
How do you work with NaN values in Numpy?
arr = np.array([1,2,np.nan,4,np.nan])
# Check for NaN
print(np.isnan(arr)) # [False, False, True, False, True]
# NaN-aware functions
print(np.nanmean(arr)) # Mean ignoring NaN
print(np.nansum(arr)) # Sum ignoring NaN
# Remove NaN values
clean_arr = arr[~np.isnan(arr)]
How do you create a pandas DataFrame?
import pandas as pd
# From dictionary
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie'],
'age': [25, 30, 35],
'city': ['NYC', 'LA', 'Chicago']
})
# From lists
df2 = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
How do you read a csv file?
# Basic CSV reading
df = pd.read_csv('file.csv')
# With options
df = pd.read_csv('file.csv',
index_col=0, # Use first column as index
sep=';', # Different separator
encoding='utf-8', # Specify encoding
na_values=['N/A']) # Custom missing values
How do you select data from DataFrames?
# Column selection
df['name'] # Single column (Series)
df[['name', 'age']] # Multiple columns (DataFrame)
# Row selection
df.loc[0] # By label
df.iloc[0] # By position
df.loc[0:2] # Multiple rows
# Boolean indexing
df[df['age'] > 25] # Filter rows
How do you handle missing data?
# Check for missing data
print(df.isnull().sum()) # Count missing values per column
print(df.info()) # Data types and non-null counts
# Handle missing data
df_clean = df.dropna() # Drop rows with NaN
df_filled = df.fillna(0) # Fill rows with 0
df['age'].fillna(df['age'].mean()) # Fill with mean
How do you perform GroupBy operations?
# Basic groupby
grouped = df.groupby('city')
print(grouped['age'].mean()) # Mean age by city
# Multiple aggregations
result = df.groupby('city')['age'].agg(['mean','std','count'])
# Group by multiple columns
result2 = df.groupby(['city','gender'])['salary'].sum()
How do you filter DataFrames?
# Single condition
young = df[df['age'] < 30]
# Multiple conditions
filtered = df[(df['age'] > 25) & (df['city'] == 'NYC')]
# Using query method
result = df.query('age > 25 and city == "NYC"')
# String contains
name_filter = df[df['name'].str.contains('A')]
How do you add a new column?
# Simple assignment
df['age_squared'] = df['age'] ** 2
# Based on conditions
df['age_group'] = df['age'].apply(lambda x: 'young' if x < 30 else 'old')
# Multiple columns at once
df[['total', 'average']] = df[['col1', 'col2']].apply(lambda x: [x.sum(), x.mean()], axis=1, result_type='expand')
How do you rename columns?
# Rename specific columns
df_renamed = df.rename(columns={'old_name': 'new_name'})
# Rename all columns
df.columns = ['col1', 'col2', 'col3']
# Using str methods
df.columns = df.columns.str.lower().str.replace(' ','_')
How do you drop columns or rows?
# Drop columns
df_no_age = df.drop('age', axis=1)
df_subset = df.drop(['age', 'city'], axis=1)
# Drop rows
df_no_first = df.drop(0, axis=0) # Drop first row
df_clean = df.drop([0, 1, 2]) # Drop multiple rows
# Alternative: del statement
del df['column_name']
How do you sort DataFrames?
# Sort by single column
df_sorted = df.sort_values('age')
# Sort by multiple columns
df_sorted = df.sort_values(['city', 'age'], ascending=[True, False])
# Sort by index
df_sorted = df.sort_index()
# In-place sorting
df.sort_values('age', inplace=True)
How do you merge DataFrames?
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value2': [4, 5, 6]})
# Inner join (default)
inner = pd.merge(df1, df2, on='key')
# Left join
left = pd.merge(df1, df2, on='key', how='left')
# Different column names
merged = df1.merge(df2, left_on='id', right_on='user_id')
How do you concatenate DataFrames?
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})
# Vertical concatenation (stack rows)
vertical - pd.concat([df1, df2])
# Horizontal concatenation (side by side)
horizontal = pd.concat([df1, df2], axis=1)
# With keys
with_keys = pd.concat([df1, df2], keys=['first', 'second'])
How do you pivot DataFrames?
# Sample data
data = {'date': ['2023-01-01', '2023-01-01', '2023-01-02'],
'variable': ['A', 'B', 'A'],
'value': [1, 2, 3]}
df = pd.DataFrame(data)
# Pivot table
pivoted = df.pivot(index='date', columns='variable', values='value')
# Pivot with aggregation
pivot_table = df.pivot_tab;e(index='date', columns='variable', values='value', aggfunc='mean')
How do you work with datetime data?
# Convert to datetime
df['date'] = pd.to_datetime(df['date'])
# Extract date components
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day_name'] = df['date']dt.day_name()
# Date arithmetic
df['days_ago'] = (pd.Timestamp.now() - df['date']).dt.days
How do you apply functions to DataFrames?
# Apply to entire DataFrame
df_squared = df.apply(lambda x: x**2) # Numerif columns only
# Apply to specific column
df['name_length'] = df['name'].apply(len)
df['name_upper'] = df['name'].apply(str.upper)
# Apply custom function
def categorize_age(age):
return 'young' if age < 30
else 'old'
df['category'] = df['age'].apply(categorize_age)
How do you get DataFrame info and statistics?
# Basic info
print(df.info()) # Data types, memory usage
print(df.describe()) # Statistical summary
print(df.shape) # (rows, columns)
print(df.dtypes) # Column data types
print(df.columns) # Column names
print(df.head()) # First 5 rows
print(df.tail()) # Last 5 rows
How do you handle duplicates?
# Check for duplicates
print(df.duplicated().sum()) # Count duplicates
print(df.duplicated(['name'])) # Duplicates in specific column
# Remove duplicates
df_unique = df.drop_duplicates()
df_unique = df.drop_duplicates(['name'], keep='first')
# Mark duplicates
df['is_duplicate'] = df.duplicated()
How do you reset index?
# Reset index (old index becomes column)
df_reset = df.reset_index()
# Reset and drop old index
df_reset = df.reset_index(drop=True)
# In-place reset
df.reset_index(drop=True, inplace=True)
How do you set index?
# # Set single column as index
df_indexed = df.set_index('name')
# Set multiple columns as index (MultiIndex)
df_multi = df.set_index(['city', 'name'])
# In-place index setting
df.set_index('name', inplace=True)
How do you save DataFrames to CSV?
# Basic CSV export
df.to_csv('output.csv', index=False)
# With options
df.to_csv('output.csv',
index=False, # Don't include index
sep=';', # Different separator
encoding='utf-8', # Specify encoding
na_rep='Missing') # How to represent NaN
How do you work with string columns?
# String operations
df['name_upper'] = df['name'].str.upper()
df['name_lower'] = df['name'].str.lower()
df['name_length'] = df['name'].str.len()
# String contains
filtered = df[df['name'].str.contains('A', na=False)]
# String split
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)
How do you create cross-tabulations?
# Simple crosstab
crosstab = pd.crosstab(df['city'], df['age_group'])
# With percentages
crosstab_pct = pd.crosstab(df['city'], df['age_group'], normalize='index')
# With margins (totals)
crosstab_margins = pd.crosstab(df['city'], df['age_group'], margins=True)
How do you sample data?
# Random sample of n rows
sample_n = df.sample(n=5)
# Random sample of fraction
sample_frac = df.sample(frac=0.1) # 10% of data
# Sample with replacement
sample_replace = df.sample(n=100, replace=True)
# Set seed for reproducibility
sample_seed = df.sample(n=5, random_state=42)
How do you replace values?
# Replace specific values
df_replaced = df.replace('old_value', 'new_value')
# Replace multiple values
df_replaced = df.replace({'A': 1, 'B': 2, 'C': 3})
# Replace in specific column
df['grade'] = df['grade'].replace({'A': 'Excellent', 'B': 'Good'})
# Replace using regex
df['text'] = df['text'].str.replace(r'\d+', 'NUMBER', regex=True)
How do you work with categorical data?
# Convert to categorical
df['category'] = df['category'].astype('category')
# Create dummy variables
dummies = pd.get_dummies(df['category'])
# Label encoding (manual)
df['category_code'] = df['category'].cat.codes
# One-hot encoding with prefix
dummies_prefix = pd.get_dummies(df['category'], prefix='cat')
How do you create a basic line plot?
import matplotlib.pyplot as plt
# Basic line plot
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
plt.plot(x, y)
plt.xlabel('X values')
plt.ylabel('Y values')
plt.title('My Line Plot')
plt.grid(True)
plt.show()
How do you create a scatter plot?
import matplotlib.pyplot as plt
import numpy as np
# Basic scatter plot
x = np.random.randn(100)
y = np.random.randn(100)
colors = np.random.rand(100)
sizes = 1000 * np.random.rand(100)
plt.scatter(x, y, c=colors, s=sizes, alpha=0.6)
plt.colorbar()
plt.show()
How do you create a bar chart?
import matplotlib.pyplot as plt
# Vertical bar chart
categories = ['A', 'B', 'C', 'D']
values = [23, 45, 56, 78]
plt.bar(categories, values, color='skyblue')
plt.ylabel('Values')
plt.title('Bar Chart')
# Horizontal bar chart
plt.barh(categories, values)
plt.show()
How do you create a histogram?
import matplotlib.pyplot as plt
import numpy as np
# Generate sample data
data = np.random.normal(100, 15, 1000)
# Create histogram
plt.hist(data, bins=30, alpha=0.7, color='green', edgecolor='black')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Histogram')
plt.show()
How do you customize plot appearance?
import matplotlib.pyplot as plt
# Set figure size and style
plt.figure(figsize=(10, 6))
plt.style.use('seaborn-v0_8')
# Plot with customization
plt.plot(x, y, color='red', linewidth=2, linestyle='--', marker='o')
plt.title('Customized Plot', fontsize=16, fontweight='bold')
plt.xlabel('X Label', fontsize=12)
plt.ylabel('Y Label', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
How do you create subplots?
import matplotlib.pyplot as plt
# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# Plot in different subplots
axes[0, 0].plot(x, y)
axes[0, 0].set_title('Line Plot')
axes[0, 1].scatter(x, y)
axes[0, 1].set_title('Scatter Plot')
axes[1, 0].bar(categories, values)
axes[1, 0].set_title('Bar Chart')
axes[1, 1].hist(data)
axes[1, 1].set_title('Histogram')
plt.tight_layout()
plt.show()
How do you create a heatmap with seaborn?
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Create sample correlation matrix
data = np.random.rand(10, 10)
corr_matrix = np.corrcoef(data)
# Create heatmap
sns.heatmap(corr_matrix,
annot=True, # Show values
cmap='coolwarm', # Color scheme
center=0, # Center colormap at 0
square=True) # Square cells
plt.title('Correlation Heatmap')
plt.show()
How do you create a box plot?
import seaborn as sns
import matplotlib.pyplot as plt
# Seaborn box plot
sns.boxplot(data=df, x='category', y='value')
plt.title('Box Plot by Category')
plt.xticks(rotation=45)
plt.show()
# Matplotlib box plot
data_groups = [group['value'].values for name, group in df.groupby('category')]
plt.boxplot(data_groups, labels=df['category'].unique())
plt.show()
How do you create a violin plot?
import seaborn as sns
import matplotlib.pyplot as plt
# Basic violin plot
sns.violinplot(data=df, x='category', y='value')
plt.title('Violin Plot')
plt.show()
# Split violin plot (compare two groups)
sns.violinplot(data=df, x='category', y='value', hue='group', split=True)
plt.show()
How do you save plots?
import matplotlib.pyplot as plt
# Create plot
plt.plot(x, y)
plt.title('My Plot')
# Save with different formats and options
plt.savefig('plot.png', dpi=300, bbox_inches='tight')
plt.savefig('plot.pdf', bbox_inches='tight')
plt.savefig('plot.jpg', dpi=150, facecolor='white')
# Save without displaying
plt.savefig('plot.png')
plt.close() # Close figure to free memory
How do you create a correlation matrix plot?
import seaborn as sns
import matplotlib.pyplot as plt
# Calculate correlation matrix
corr = df.select_dtypes(include=[np.number]).corr()
# Plot correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(corr,
annot=True,
cmap='RdBu_r',
center=0,
square=True,
linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()
How do you create a pair plot?
import seaborn as sns
# Pair plot of all numeric columns
sns.pairplot(df)
plt.show()
# Pair plot with grouping
sns.pairplot(df, hue='species', markers=['o', 's', 'D'])
plt.show()
# Pair plot with specific columns
sns.pairplot(df, vars=['col1', 'col2', 'col3'], hue='target')
plt.show()
How do you add legends to plots?
import matplotlib.pyplot as plt
# Plot multiple lines
plt.plot(x, y1, label='Line 1', color='blue')
plt.plot(x, y2, label='Line 2', color='red')
plt.plot(x, y3, label='Line 3', color='green')
# Add legend
plt.legend(loc='upper right') # or 'best', 'lower left', etc.
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') # Outside plot
plt.show()
How do you create a pie chart?
import matplotlib.pyplot as plt
# Data for pie chart
labels = ['Python', 'Java', 'JavaScript', 'C++']
sizes = [30, 25, 20, 15]
explode = (0.1, 0, 0, 0) # Explode first slice
# Create pie chart
plt.pie(sizes, labels=labels, explode=explode, autopct='%1.1f%%',
shadow=True, startangle=90)
plt.axis('equal') # Equal aspect ratio
plt.title('Programming Languages Usage')
plt.show()
How do you plot time series data?
import matplotlib.pyplot as plt
import pandas as pd
# Create time series data
dates = pd.date_range('2023-01-01', periods=100, freq='D')
values = np.cumsum(np.random.randn(100))
# Plot time series
plt.figure(figsize=(12, 6))
plt.plot(dates, values)
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
How do you split data for machine learning?
from sklearn.model_selection import train_test_split
# Basic train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Stratified split (maintains class distribution)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=y, random_state=42
)
How do you create a linear regression model?
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Create and train model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'MSE: {mse}, R²: {r2}')
How do you evaluate model performance?
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error, mean_absolute_error
# Classification metrics
accuracy = accuracy_score(y_true, y_pred)
report = classification_report(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)
# Regression metrics
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mse)
How do you scale features?
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
# Standard scaling (mean=0, std=1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Min-Max scaling (0 to 1)
min_max_scaler = MinMaxScaler()
X_minmax = min_max_scaler.fit_transform(X_train)
# Robust scaling (median-based)
robust_scaler = RobustScaler()
X_robust = robust_scaler.fit_transform(X_train)
How do you encode categorical variables?
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pandas as pd
# Label encoding (ordinal)
le = LabelEncoder()
y_encoded = le.fit_transform(y)
# One-hot encoding
encoder = OneHotEncoder(sparse_output=False)
X_encoded = encoder.fit_transform(X_categorical)
# Using pandas
dummies = pd.get_dummies(df['category'], prefix='cat')
df_encoded = pd.concat([df, dummies], axis
How do you perform cross-validation?
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
# Basic cross-validation
model = LogisticRegression()
scores = cross_val_score(model, X, y, cv=5)
print(f'CV Scores: {scores}')
print(f'Mean CV Score: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})')
# Stratified K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=skf)
How do you create a classification model?
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
# Logistic Regression
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
# Support Vector Machine
svm = SVC(random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
How do you handle imbalanced datasets?
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
# Resampling minority class
minority_upsampled = resample(minority_class,
replace=True,
n_samples=len(majority_class))
# SMOTE (Synthetic Minority Oversampling)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
# Class weights
clf = RandomForestClassifier(class_weight='balanced')
clf.fit(X_train, y_train)
How do you perform feature selection?
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.ensemble import RandomForestClassifier
# Univariate feature selection
selector = SelectKBest(score_func=f_classif, k=10)
X_selected = selector.fit_transform(X, y)
# Recursive Feature Elimination
rf = RandomForestClassifier()
rfe = RFE(estimator=rf, n_features_to_select=10)
X_rfe = rfe.fit_transform(X, y)
# Feature importance from tree-based models
rf.fit(X, y)
importances = rf.feature_importances_
How do you create a pipeline?
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
# Create pipeline
pipe = Pipeline([
('scaler', StandardScaler()),
('classifier', RandomForestClassifier(random_state=42))
])
# Fit and predict
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
# Access individual steps
scaler = pipe.named_steps['scaler']
classifier = pipe.named_steps['classifier']
How do you tune hyperparameters?
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
# Grid Search
param_grid = {
'n_estimators': [50, 100, 200],
'max_depth': [3, 5, 7, None],
'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
print(f'Best params: {grid_search.best_params_}')
How do you create a confusion matrix?
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
# Create confusion matrix
cm = confusion_matrix(y_true, y_pred)
print(cm)
# Visualize confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.title('Confusion Matrix')
plt.show()
# Classification report
print(classification_report(y_true, y_pred))
How do you perform clustering?
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
import matplotlib.pyplot as plt
# K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
cluster_labels = kmeans.fit_predict(X)
centers = kmeans.cluster_centers_
# DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
db_labels = dbscan.fit_predict(X)
# Hierarchical clustering
agg_clustering = AgglomerativeClustering(n_clusters=3)
agg_labels = agg_clustering.fit_predict(X)
# Visualize clusters
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels)
plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, c='red')
plt.show()
How do you reduce dimensionality?
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
# Principal Component Analysis
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
print(f'Explained variance ratio: {pca.explained_variance_ratio_}')
# t-SNE
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X)
# Visualize reduced dimensions
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
plt.title('PCA')
plt.subplot(1, 2, 2)
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y)
plt.title('t-SNE')
plt.show()
How do you handle overfitting?
# Techniques to handle overfitting:
# 1. Cross-validation
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)
# 2. Regularization
from sklearn.linear_model import Ridge, Lasso
ridge = Ridge(alpha=1.0) # L2 regularization
lasso = Lasso(alpha=1.0) # L1 regularization
# 3. Early stopping (for neural networks)
# 4. Dropout (for neural networks)
# 5. Ensemble methods
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)
# 6. More training data
# 7. Feature selection
# 8. Reduce model complexity
How do you create lambda functions?
# Lambda functions (anonymous functions)
square = lambda x: x**2
print(square(5)) # 25
# With multiple arguments
add = lambda x, y: x + y
print(add(3, 4)) # 7
# Using with built-in functions
numbers = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x**2, numbers))
evens = list(filter(lambda x: x % 2 == 0, numbers))
# In pandas
df['new_col'] = df['old_col'].apply(lambda x: x * 2)
How do you handle exceptions?
# Basic try-except
try:
result = 10 / 0
except ZeroDivisionError:
print('Cannot divide by zero!')
# Multiple exceptions
try:
value = int(input('Enter number: '))
result = 10 / value
except ValueError:
print('Invalid input!')
except ZeroDivisionError:
print('Cannot divide by zero!')
finally:
print('This always executes')
# Generic exception handler
try:
risky_operation()
except Exception as e:
print(f'Error occurred: {e}')
How do you read and write files?
# Reading files
with open('file.txt', 'r') as f:
content = f.read() # Read entire file
lines = f.readlines() # Read all lines
first_line = f.readline() # Read one line
# Writing files
with open('output.txt', 'w') as f:
f.write('Hello, World!')
f.writelines(['line1\n', 'line2\n'])
# Appending to files
with open('log.txt', 'a') as f:
f.write('New log entry\n')
# Working with CSV
import csv
with open('data.csv', 'r') as f:
reader = csv.reader(f)
for row in reader:
print(row)
How do you use map() and filter()?
# map() applies function to all items
numbers = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x**2, numbers)) # [1, 4, 9, 16, 25]
strings = list(map(str, numbers)) # ['1', '2', '3', '4', '5']
# filter() filters items based on condition
evens = list(filter(lambda x: x % 2 == 0, numbers)) # [2, 4]
positive = list(filter(lambda x: x > 0, [-1, 0, 1, 2])) # [1, 2]
# Multiple iterables with map
list1 = [1, 2, 3]
list2 = [4, 5, 6]
sums = list(map(lambda x, y: x + y, list1, list2)) # [5, 7, 9]
How do you create decorators?
# Simple decorator
def my_decorator(func):
def wrapper(*args, **kwargs):
print('Before function call')
result = func(*args, **kwargs)
print('After function call')
return result
return wrapper
# Using decorator
@my_decorator
def greet(name):
return f'Hello, {name}!'
# Decorator with arguments
def repeat(times):
def decorator(func):
def wrapper(*args, **kwargs):
for _ in range(times):
result = func(*args, **kwargs)
return result
return wrapper
return decorator
@repeat(3)
def say_hello():
print('Hello!')
How do you work with generators?
# Generator function
def fibonacci():
a, b = 0, 1
while True:
yield a
a, b = b, a + b
# Using generator
fib = fibonacci()
for _ in range(10):
print(next(fib))
# Generator expression
squares = (x**2 for x in range(10))
print(list(squares)) # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
# Generator for memory efficiency
def read_large_file(file_path):
with open(file_path, 'r') as f:
for line in f:
yield line.strip()
How do you use args and *kwargs?
# *args for variable positional arguments
def sum_all(*args):
return sum(args)
print(sum_all(1, 2, 3, 4)) # 10
# **kwargs for variable keyword arguments
def print_info(**kwargs):
for key, value in kwargs.items():
print(f'{key}: {value}')
print_info(name='Alice', age=30, city='NYC')
# Combining both
def flexible_function(*args, **kwargs):
print(f'Args: {args}')
print(f'Kwargs: {kwargs}')
flexible_function(1, 2, 3, name='Alice', age=30)
# Unpacking arguments
numbers = [1, 2, 3, 4]
print(sum_all(*numbers)) # Unpacking list
info = {'name': 'Bob', 'age': 25}
print_info(**info) # Unpacking dictionary
How do you create classes?
# Basic class
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
def greet(self):
return f'Hi, I am {self.name}'
def have_birthday(self):
self.age += 1
return f'Happy birthday! Now {self.age}'
# Inheritance
class Student(Person):
def __init__(self, name, age, student_id):
super().__init__(name, age)
self.student_id = student_id
def study(self, subject):
return f'{self.name} is studying {subject}'
# Using classes
person = Person('Alice', 25)
student = Student('Bob', 20, 'S12345')
print(person.greet())
print(student.study('Python'))
How do you work with JSON data?
import json
# Python dict to JSON string
data = {'name': 'Alice', 'age': 30, 'city': 'NYC'}
json_string = json.dumps(data, indent=2)
print(json_string)
# JSON string to Python dict
json_data = '{"name": "Bob", "age": 25}'
python_dict = json.loads(json_data)
print(python_dict)
# Save to JSON file
with open('data.json', 'w') as f:
json.dump(data, f, indent=2)
# Load from JSON file
with open('data.json', 'r') as f:
loaded_data = json.load(f)
print(loaded_data)
# Handle datetime objects
from datetime import datetime
data_with_date = {'name': 'Alice', 'timestamp': datetime.now()}
# Need custom serializer for datetime
How do you measure execution time?
import time
from timeit import timeit
# Using time module
start_time = time.time()
# Your code here
time.sleep(1) # Simulate work
end_time = time.time()
execution_time = end_time - start_time
print(f'Execution time: {execution_time:.2f} seconds')
# Using timeit for small code snippets
execution_time = timeit('sum(range(100))', number=10000)
print(f'Average time: {execution_time/10000:.6f} seconds')
# Context manager for timing
from contextlib import contextmanager
@contextmanager
def timer():
start = time.time()
yield
end = time.time()
print(f'Elapsed time: {end - start:.2f} seconds')
with timer():
# Your code here
sum(range(1000000))