Data Science Flash Cards for Python

0.0(0)
studied byStudied by 1 person
learnLearn
examPractice Test
spaced repetitionSpaced Repetition
heart puzzleMatch
flashcardsFlashcards
Card Sorting

1/101

flashcard set

Earn XP

Description and Tags

Generated by Clajude

Study Analytics
Name
Mastery
Learn
Test
Matching
Spaced

No study sessions yet.

102 Terms

1
New cards

How do you create variables and check their types?

name = 'John'
age = 25

print(type(name), type(age))
>>> <class 'str'> <class 'int'>

2
New cards

How do you create and manipulate lists?

fruits=['apple','banana']
fruits.append('orange')
fruits.extend(['grape','kiwi'])
print(fruits[0]) # First element
print(fruits[-1]) # Last element
>>>apple
>>>kiwi

3
New cards

How do you create and access dictionary data?

student = {'name': 'Alice', 'age': 22}

print(student['name'])

student['grade'] = 'A'

print(student.get('city', 'Unknown'))

>>>Alice
>>>Unknown

4
New cards

How do you iterate through data with for loops?

# Iterate over a list
for item in ['a', 'b', 'c']:
  print(item)
>>>a
>>>b
>>>c

# Iterate with index
for i in range(5):
  print(f"Number: {i}")
>>>Number: 0
>>>Number: 1
>>>Number: 2
>>>Number: 3
>>>Number: 4

5
New cards

How do you create lists efficiently using list comprehensions?

# Basic list comprehension
squares = [x**2 for x in range(5)]
print(squares)
>>>[0, 1, 4, 9, 16]

# With condition
evens = [x for x in range(10) if x % 2 == 0]
print(evens)
>>>[0, 2, 4, 6, 8]

# Nested comprehension
matrix = [[i*j for j in range(3)] for i in range(3)]
print(matrix)
[[0, 0, 0], [0, 1, 2], [0, 2, 4]]

6
New cards

How do you define and call functions?

def greet(name, greeting='Hello'):
  return f'{greeting}, {name}!'

# Function calls
result = greet('Alice')
result2 = greet('Bob', 'Hi') # Specified a greeting other than the default 'Hello'

print(result)
>>>Hello, Alice!

print(result2)
>>>Hi, Bob!

7
New cards

How do you handle conditional statements?

x = 10
if x > 0: 
  print('positive')
elif x < 0: 
  print('negative')
else: 
  print('zero')

# Ternary operator
result = 'positive' if x > 0 else 'not positive'

>>>positive

8
New cards

How do you work with tuples?

# Create tuple
coords = (3, 5)
point = 1, 2, 3 # Parentheses optional

# Unpack tuple
x,y=coords
print(coords[0]) # Access by index
>>>3

# coords[0] = 10 # Error: tuples are immutable

9
New cards

How do you work with sets?

# Create set
unique_nums = {1, 2, 3, 3, 4} # Duplicates removed
my_set = set([1, 2, 3])

# Set operations
my_set.add(5)
print(my_set)
>>>{1, 2, 3, 5}

my_set.remove(1)
print(1 in my_set)
>>>False
print(my_set)
>>>{2, 3, 5}

10
New cards

How do you perform string operations?

text = "Hello World'

print(text.lower())
>>>hello world

print(text.upper())
>>>HELLO WORLD

print(text.split())
>>>['Hello', 'World']

print(text.replace('World', 'Python'))
>>>Hello Python

print(text.startswith('Hello')) 
>>>True

11
New cards

How do you use while loops?

count = 0
while count < 3: 
  print(f'Count is: {count}')
  count += 1
>>> Count is: 0
>>> Count is: 1
>>> Count is: 2

# With break and continue

12
New cards

How do you work with range()?

# Basic range

for i in range(5):    # 0, 1, 2, 3, 4
  print(i)

>>> 0
>>> 1
>>> 2
>>> 3
>>> 4


# Range with start, stop, step

for i in range(0, 10, 2):
  print(i)

>>> 0
>>> 2
>>> 4
>>> 6
>>> 8


# Convert to list

numbers = list(range(5))
print(numbers)

>>> [0, 1, 2, 3, 4]

13
New cards

How do you check if an item is in a list/dict?

fruits = ['apple', 'banana']
student = {'name': 'Alice', 'age': 22}

print('apple' in fruits)
>>> True

print('name' in student)
>>> True

print('grade' in student)
>>> False

14
New cards

How do you sort lists?

numbers = [3, 1, 4, 1, 5]

# In-place sorting
numbers.sort()
print(numbers)   
>>> [1, 1, 3, 4, 5]

# Return new sorted list
sorted_nums = sorted([3, 1, 4], reverse=True)
print(sorted_nums)
>>> [4, 3, 1]

15
New cards

How do you remove items from lists?

fruits = ['apple', 'banana', 'orange']

# Remove by value
fruits.remove('apple')
print(fruits)
>>> ['banana', 'orange']


# Remove by index
removed = fruits.pop(0)
print(fruits)
>>> ['banana', 'orange']


# Delete by index
del fruits[0]
print(fruits)
>>> ['banana', 'orange']

16
New cards

How do you get user input?

name = input('Enter your name: ')
age = int(input('Enter your age: '))
height = float(input('Enter height: '))

print(f'Hello {name}, you are {age} years old')

# Upon running, a text box will pop up for each option and user will enter values. For example, Name = Oliver, age = 89
>>> Hello Oliver, you are 89 years old

17
New cards

How do you convert between data types?

# String conversions to another data type
num_str = str(42)
>>> 42
num_int = int('42')
>>> 42
num_float = float('3.14')
>>> 3.14

# Collection conversions
my_list = list('hello')
print(my_list)
>>> ['h', 'e', 'l', 'l', 'o']

my_tuple=tuple([1, 2, 3])
>>> (1, 2, 3)

18
New cards

How do you work with multiple assignment?

# Multiple assignment
a, b = 1, 2
x = y = z = 0

# Swapping variables
a, b = b, a

# Unpacking
coords = (3, 5)
x, y = coords

print(coords)
>>> (3, 5)

19
New cards

How do you use enumerate()?

fruits = ['apple', 'banana', 'cherry']

# Get index and value
for i, fruit in enumerate(fruits):
  print(f'{i}: {fruit}')
>>> 0: apple
>>> 1: banana
>>> 2: cherry

# Start counting from 1
for i, fruit in enumerate(fruits, 1):
  print(f'{i}: {fruit}')
>>> 1: apple
>>> 2: banana
>>> 3: cherry

20
New cards

How do you use zip()?

names = [‘Alice’, ‘Bob’, ‘Charlie’]
ages = [25, 30, 35]
cities = [‘NYC’, ‘LA’, ‘Chicago’]


# Combine multiple lists
for name, age, city in zip(names, ages, cities):
  print(f'{name}, {age}, {city}')
>>> Alice, 25, NYC
>>> Bob, 30, LA
>>> Charlie, 35, Chicago

21
New cards

How do you create NumPy arrays?

import numpy as np

# From list
arr = np.array([1, 2, 3])
print(arr)
>>> [1 2 3]

# Create special arrays
zeros = np.zeros((3, 4))   # 3x4 array of zeros
>>> [[0. 0. 0. 0.]
>>> [0. 0. 0. 0.]
>>> [0. 0. 0. 0.]]

ones = np.ones((2, 3))     # 2x3 array of ones
>>> [[1. 1. 1.]
>>> [1. 1. 1.]]

eye = np.eye(3)        # 3x3 identity matrix
>>> [[1. 0. 0.]
>>> [0. 1. 0.]
>>> [0. 0. 1.]]

22
New cards

How do you access NumPy array elements?

arr = np.array([[1,2,3], [4,5,6]])

# Basic indexing
print(arr[0])    # First row
print(arr[0,1])  # Element at row 0, col 1
print(arr[:, 1]) #  All rows, col 1
print(arr[1:3])  # Rows 1 to 2

23
New cards

How do you perform mathematical operations on arrays?

a = np.array([1,2,3])
b = np.array(4,5,6])

# Element-wise operations
print(a + b)      # [5, 7, 9]
print(a * b)      # [4, 10, 18]
print(np.sqrt(a)) # Square root
print(np.exp(a))  # Exponential

24
New cards

How do you reshape NumPy arrays?

arr = np.array([a,2,3,4,5,6])

# Reshape to 2D
reshaped = arr.reshape(2,3)   # 2 rows, 3 cols

# Flatten to 1D
flat = reshaped.flatten()

# Transpose
transposed = reshaped.T

25
New cards

How do you find array statistics?

arr = np.array([1,2,3,4,5])

print(np.mean(arr))   # Mean 3.0
print(np.std(arr))    # Standard deviation
print(np.min(arr))    # Minimum: 1
print(np.max(arr))    # Maximum: 5
print(np.median(arr)) # Median: 3.0
print(np.sum(arr))    # Sum: 15

26
New cards

How do you create arrays with specific values?

# Arrays with specific values
zeros = np.zeros(5)             # [0,0,0,0,0}
ones = np.ones(3)               # [1,1,1]
full = np.full(4, 7)            # [7,7,7,7]
range_arr = np.arange(0, 10, 2) # [0,2,4,6,8]
linspace = np.linspace(0, 1, 5) # 5 points from 0 to 1

27
New cards

How do you perform boolean indexing?

arr = np.array([1, 2, 3, 4, 5, 6])

# Boolean conditions
print(arr > 3)      # (False, False, False, True, True, True)
print(arr[arr > 3]) # [4,5,6]

# Multiple conditions
print(arr[(arr > 2) & (arr < 6)])   # [3,4,5]

28
New cards

How do you concatenate arrays?

arr1 = np.array([a,2,3])
arr2 = np.array([4,5,6])

# Concatenate 1D arrays
result = np.concantenate([arr1, arr2])   # [1,2,3,4,5,6]

# Stack arrays
vstack = np.vstack([arr1, arr2])   # Vertical stack
hstack = np.hstack([arr1, arr2])   # Horizontal stack

29
New cards

How do you perform matrix multiplication?

A = np.array([[1,2], [3,4]])
B = np.array([[5,6], [7,8]])

# Matrix multiplication
result1 = np.dot(A, B)
result2 = A @ B       # Python 3.5+
result3 = np.matmul(A,B)

# Element-wise multiplication
result4 = A * B

30
New cards

How do you find unique values in arrays?

arr = np.array([1,2,2,3,3,3,4])

# Unique values
unique_vals = np.unique(arr)   # [1,2,3,4]

#Unique values with counts
unique_vals, counts = np.unique(arr, return_counts=True)
print(unique_vals)             # [1,2,3,4]
print(counts)                  # [1,2,3,1]

31
New cards

How do you save and load NumPy arrays?

arr = np.array([1,2,3,4,5])

# Save a single array
np.save('my_array.npy', arr)
loaded_arr = np.load('my_array.npy')

# Save multiple arrays
np.savez('arrays.npz', a=arr, b=arr*2)
data = np.load('arrays.npz')
print(data['a'], data['b'])

32
New cards

How do you perform broadcasting?

# Broadcasting allows operations on arrays of different shapes
arr = np.array([[1,2,3], [4,5,6]])

# Add scalar (broadcasts to all elements
result1 = arr + 10

# Add 1D array to 2D array
vec = np.array([1,2,3])
result2 = arr + vec     # Adds vec to each row

33
New cards

How do you work with random numbers?

# Set seed for reproducibility
np.random.seed(42)

# Random arrays
rand_uniform = np.random.rand(3,4)   # Uniform [0,1)
rand_normal = np.random.randn(3,4)   # Standard normal
rand_int = np.random.randint(0,10,5) # Random integers

34
New cards

How do you perform array comparison?

arr1 = np.array([1,2,3])
arr2 = np.array([1,2,4])

# Element-wise comparison
print(arr1 == arr2)    # [True, True, False]

# Array equality
print(np.array_equal(arr1, arr2))  # False

# All/any conditions
print(np.all(arr1 > 0))  # True
print(any(arr1 > 2))     # True

35
New cards

How do you work with NaN values in Numpy?

arr = np.array([1,2,np.nan,4,np.nan])

# Check for NaN
print(np.isnan(arr))   # [False, False, True, False, True]

# NaN-aware functions
print(np.nanmean(arr))  # Mean ignoring NaN
print(np.nansum(arr))   # Sum ignoring NaN

# Remove NaN values
clean_arr = arr[~np.isnan(arr)]

36
New cards

How do you create a pandas DataFrame?

import pandas as pd

# From dictionary
df = pd.DataFrame({
  'name': ['Alice', 'Bob', 'Charlie'], 
  'age': [25, 30, 35], 
  'city': ['NYC', 'LA', 'Chicago']
})

# From lists
df2 = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])

37
New cards

How do you read a csv file?

# Basic CSV reading
df = pd.read_csv('file.csv')

# With options
df = pd.read_csv('file.csv',
  index_col=0,       # Use first column as index
  sep=';',           # Different separator
  encoding='utf-8',  # Specify encoding
  na_values=['N/A']) # Custom missing values

38
New cards

How do you select data from DataFrames?

# Column selection
df['name']           # Single column (Series)
df[['name', 'age']]  # Multiple columns (DataFrame)

# Row selection 
df.loc[0]            # By label
df.iloc[0]           # By position
df.loc[0:2]          # Multiple rows

# Boolean indexing
df[df['age'] > 25]   # Filter rows

39
New cards

How do you handle missing data?

# Check for missing data
print(df.isnull().sum())     # Count missing values per column
print(df.info())             # Data types and non-null counts

# Handle missing data
df_clean = df.dropna()          # Drop rows with NaN
df_filled = df.fillna(0)        # Fill rows with 0
df['age'].fillna(df['age'].mean())  # Fill with mean

40
New cards

How do you perform GroupBy operations?

# Basic groupby
grouped = df.groupby('city')
print(grouped['age'].mean())   # Mean age by city

# Multiple aggregations
result = df.groupby('city')['age'].agg(['mean','std','count'])

# Group by multiple columns
result2 = df.groupby(['city','gender'])['salary'].sum()

41
New cards

How do you filter DataFrames?

# Single condition
young = df[df['age'] < 30]

# Multiple conditions
filtered = df[(df['age'] > 25) & (df['city'] == 'NYC')]

# Using query method
result = df.query('age > 25 and city == "NYC"')

# String contains
name_filter = df[df['name'].str.contains('A')]

42
New cards

How do you add a new column?

# Simple assignment
df['age_squared'] = df['age'] ** 2

# Based on conditions
df['age_group'] = df['age'].apply(lambda x: 'young' if x < 30 else 'old')

# Multiple columns at once
df[['total', 'average']] = df[['col1', 'col2']].apply(lambda x: [x.sum(), x.mean()], axis=1, result_type='expand')

43
New cards

How do you rename columns?

# Rename specific columns
df_renamed = df.rename(columns={'old_name': 'new_name'})

# Rename all columns
df.columns = ['col1', 'col2', 'col3']

# Using str methods
df.columns = df.columns.str.lower().str.replace(' ','_')

44
New cards

How do you drop columns or rows?

# Drop columns
df_no_age = df.drop('age', axis=1)
df_subset = df.drop(['age', 'city'], axis=1)

# Drop rows
df_no_first = df.drop(0, axis=0)  # Drop first row
df_clean = df.drop([0, 1, 2])     # Drop multiple rows

# Alternative: del statement
del df['column_name']

45
New cards

How do you sort DataFrames?

# Sort by single column
df_sorted = df.sort_values('age')

# Sort by multiple columns
df_sorted = df.sort_values(['city', 'age'], ascending=[True, False])

# Sort by index
df_sorted = df.sort_index()

# In-place sorting
df.sort_values('age', inplace=True)

46
New cards

How do you merge DataFrames?

df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value2': [4, 5, 6]})

# Inner join (default)
inner = pd.merge(df1, df2, on='key')

# Left join
left = pd.merge(df1, df2, on='key', how='left')

# Different column names
merged = df1.merge(df2, left_on='id', right_on='user_id')

47
New cards

How do you concatenate DataFrames?

df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})

# Vertical concatenation (stack rows)
vertical - pd.concat([df1, df2])

# Horizontal concatenation (side by side)
horizontal = pd.concat([df1, df2], axis=1)

#  With keys
with_keys = pd.concat([df1, df2], keys=['first', 'second'])

48
New cards

How do you pivot DataFrames?

# Sample data
data = {'date': ['2023-01-01', '2023-01-01', '2023-01-02'],
  'variable': ['A', 'B', 'A'], 
  'value': [1, 2, 3]}
df = pd.DataFrame(data)

# Pivot table
pivoted = df.pivot(index='date', columns='variable', values='value')

# Pivot with aggregation
pivot_table = df.pivot_tab;e(index='date', columns='variable', values='value', aggfunc='mean')

49
New cards

How do you work with datetime data?

# Convert to datetime
df['date'] = pd.to_datetime(df['date'])

# Extract date components
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day_name'] = df['date']dt.day_name()

# Date arithmetic
df['days_ago'] = (pd.Timestamp.now() - df['date']).dt.days

50
New cards

How do you apply functions to DataFrames?

# Apply to entire DataFrame
df_squared = df.apply(lambda x: x**2) # Numerif columns only

# Apply to specific column
df['name_length'] = df['name'].apply(len)
df['name_upper'] = df['name'].apply(str.upper)

# Apply custom function
def categorize_age(age): 
  return 'young' if age < 30 
  else 'old'

df['category'] = df['age'].apply(categorize_age)

51
New cards

How do you get DataFrame info and statistics?

# Basic info
print(df.info())       # Data types, memory usage
print(df.describe())   # Statistical summary
print(df.shape)        # (rows, columns)
print(df.dtypes)       # Column data types
print(df.columns)      # Column names
print(df.head())       # First 5 rows
print(df.tail())       # Last 5 rows

52
New cards

How do you handle duplicates?

# Check for duplicates
print(df.duplicated().sum())   # Count duplicates
print(df.duplicated(['name'])) # Duplicates in specific column

# Remove duplicates
df_unique = df.drop_duplicates()
df_unique = df.drop_duplicates(['name'], keep='first')

# Mark duplicates
df['is_duplicate'] = df.duplicated()

53
New cards

How do you reset index?

# Reset index (old index becomes column)
df_reset = df.reset_index()

# Reset and drop old index
df_reset = df.reset_index(drop=True)

# In-place reset
df.reset_index(drop=True, inplace=True)

54
New cards

How do you set index?

# # Set single column as index
df_indexed = df.set_index('name')

# Set multiple columns as index (MultiIndex)
df_multi = df.set_index(['city', 'name'])

# In-place index setting
df.set_index('name', inplace=True)

55
New cards

How do you save DataFrames to CSV?

# Basic CSV export
df.to_csv('output.csv', index=False)

# With options
df.to_csv('output.csv',
          index=False,           # Don't include index
          sep=';',              # Different separator
          encoding='utf-8',     # Specify encoding
          na_rep='Missing')     # How to represent NaN

56
New cards

How do you work with string columns?

# String operations
df['name_upper'] = df['name'].str.upper()
df['name_lower'] = df['name'].str.lower()
df['name_length'] = df['name'].str.len()

# String contains
filtered = df[df['name'].str.contains('A', na=False)]

# String split
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)

57
New cards

How do you create cross-tabulations?

# Simple crosstab
crosstab = pd.crosstab(df['city'], df['age_group'])

# With percentages
crosstab_pct = pd.crosstab(df['city'], df['age_group'], normalize='index')

# With margins (totals)
crosstab_margins = pd.crosstab(df['city'], df['age_group'], margins=True)

58
New cards

How do you sample data?

# Random sample of n rows
sample_n = df.sample(n=5)

# Random sample of fraction
sample_frac = df.sample(frac=0.1)  # 10% of data

# Sample with replacement
sample_replace = df.sample(n=100, replace=True)

# Set seed for reproducibility
sample_seed = df.sample(n=5, random_state=42)

59
New cards

How do you replace values?

# Replace specific values
df_replaced = df.replace('old_value', 'new_value')

# Replace multiple values
df_replaced = df.replace({'A': 1, 'B': 2, 'C': 3})

# Replace in specific column
df['grade'] = df['grade'].replace({'A': 'Excellent', 'B': 'Good'})

# Replace using regex
df['text'] = df['text'].str.replace(r'\d+', 'NUMBER', regex=True)

60
New cards

How do you work with categorical data?

# Convert to categorical
df['category'] = df['category'].astype('category')

# Create dummy variables
dummies = pd.get_dummies(df['category'])

# Label encoding (manual)
df['category_code'] = df['category'].cat.codes

# One-hot encoding with prefix
dummies_prefix = pd.get_dummies(df['category'], prefix='cat')

61
New cards

How do you create a basic line plot?

import matplotlib.pyplot as plt

# Basic line plot
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]

plt.plot(x, y)
plt.xlabel('X values')
plt.ylabel('Y values')
plt.title('My Line Plot')
plt.grid(True)
plt.show()

62
New cards

How do you create a scatter plot?

import matplotlib.pyplot as plt
import numpy as np

# Basic scatter plot
x = np.random.randn(100)
y = np.random.randn(100)
colors = np.random.rand(100)
sizes = 1000 * np.random.rand(100)

plt.scatter(x, y, c=colors, s=sizes, alpha=0.6)
plt.colorbar()
plt.show()

63
New cards

How do you create a bar chart?

import matplotlib.pyplot as plt

# Vertical bar chart
categories = ['A', 'B', 'C', 'D']
values = [23, 45, 56, 78]

plt.bar(categories, values, color='skyblue')
plt.ylabel('Values')
plt.title('Bar Chart')

# Horizontal bar chart
plt.barh(categories, values)
plt.show()

64
New cards

How do you create a histogram?

import matplotlib.pyplot as plt
import numpy as np

# Generate sample data
data = np.random.normal(100, 15, 1000)

# Create histogram
plt.hist(data, bins=30, alpha=0.7, color='green', edgecolor='black')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Histogram')
plt.show()

65
New cards

How do you customize plot appearance?

import matplotlib.pyplot as plt

# Set figure size and style
plt.figure(figsize=(10, 6))
plt.style.use('seaborn-v0_8')

# Plot with customization
plt.plot(x, y, color='red', linewidth=2, linestyle='--', marker='o')
plt.title('Customized Plot', fontsize=16, fontweight='bold')
plt.xlabel('X Label', fontsize=12)
plt.ylabel('Y Label', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

66
New cards

How do you create subplots?

import matplotlib.pyplot as plt

# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# Plot in different subplots
axes[0, 0].plot(x, y)
axes[0, 0].set_title('Line Plot')

axes[0, 1].scatter(x, y)
axes[0, 1].set_title('Scatter Plot')

axes[1, 0].bar(categories, values)
axes[1, 0].set_title('Bar Chart')

axes[1, 1].hist(data)
axes[1, 1].set_title('Histogram')

plt.tight_layout()
plt.show()

67
New cards

How do you create a heatmap with seaborn?

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Create sample correlation matrix
data = np.random.rand(10, 10)
corr_matrix = np.corrcoef(data)

# Create heatmap
sns.heatmap(corr_matrix, 
            annot=True,          # Show values
            cmap='coolwarm',     # Color scheme
            center=0,            # Center colormap at 0
            square=True)         # Square cells
plt.title('Correlation Heatmap')
plt.show()

68
New cards

How do you create a box plot?

import seaborn as sns
import matplotlib.pyplot as plt

# Seaborn box plot
sns.boxplot(data=df, x='category', y='value')
plt.title('Box Plot by Category')
plt.xticks(rotation=45)
plt.show()

# Matplotlib box plot
data_groups = [group['value'].values for name, group in df.groupby('category')]
plt.boxplot(data_groups, labels=df['category'].unique())
plt.show()

69
New cards

How do you create a violin plot?

import seaborn as sns
import matplotlib.pyplot as plt

# Basic violin plot
sns.violinplot(data=df, x='category', y='value')
plt.title('Violin Plot')
plt.show()

# Split violin plot (compare two groups)
sns.violinplot(data=df, x='category', y='value', hue='group', split=True)
plt.show()

70
New cards

How do you save plots?

import matplotlib.pyplot as plt

# Create plot
plt.plot(x, y)
plt.title('My Plot')

# Save with different formats and options
plt.savefig('plot.png', dpi=300, bbox_inches='tight')
plt.savefig('plot.pdf', bbox_inches='tight')
plt.savefig('plot.jpg', dpi=150, facecolor='white')

# Save without displaying
plt.savefig('plot.png')
plt.close()  # Close figure to free memory

71
New cards

How do you create a correlation matrix plot?

import seaborn as sns
import matplotlib.pyplot as plt

# Calculate correlation matrix
corr = df.select_dtypes(include=[np.number]).corr()

# Plot correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(corr, 
            annot=True, 
            cmap='RdBu_r', 
            center=0,
            square=True,
            linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()

72
New cards

How do you create a pair plot?

import seaborn as sns

# Pair plot of all numeric columns
sns.pairplot(df)
plt.show()

# Pair plot with grouping
sns.pairplot(df, hue='species', markers=['o', 's', 'D'])
plt.show()

# Pair plot with specific columns
sns.pairplot(df, vars=['col1', 'col2', 'col3'], hue='target')
plt.show()

73
New cards

How do you add legends to plots?

import matplotlib.pyplot as plt

# Plot multiple lines
plt.plot(x, y1, label='Line 1', color='blue')
plt.plot(x, y2, label='Line 2', color='red')
plt.plot(x, y3, label='Line 3', color='green')

# Add legend
plt.legend(loc='upper right')  # or 'best', 'lower left', etc.
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')  # Outside plot
plt.show()

74
New cards

How do you create a pie chart?

import matplotlib.pyplot as plt

# Data for pie chart
labels = ['Python', 'Java', 'JavaScript', 'C++']
sizes = [30, 25, 20, 15]
explode = (0.1, 0, 0, 0)  # Explode first slice

# Create pie chart
plt.pie(sizes, labels=labels, explode=explode, autopct='%1.1f%%', 
        shadow=True, startangle=90)
plt.axis('equal')  # Equal aspect ratio
plt.title('Programming Languages Usage')
plt.show()

75
New cards

How do you plot time series data?

import matplotlib.pyplot as plt
import pandas as pd

# Create time series data
dates = pd.date_range('2023-01-01', periods=100, freq='D')
values = np.cumsum(np.random.randn(100))

# Plot time series
plt.figure(figsize=(12, 6))
plt.plot(dates, values)
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Time Series Plot')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

76
New cards

How do you split data for machine learning?

from sklearn.model_selection import train_test_split

# Basic train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Stratified split (maintains class distribution)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

77
New cards

How do you create a linear regression model?

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Create and train model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'MSE: {mse}, R²: {r2}')

78
New cards

How do you evaluate model performance?

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Classification metrics
accuracy = accuracy_score(y_true, y_pred)
report = classification_report(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)

# Regression metrics
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mse)

79
New cards

How do you scale features?

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

# Standard scaling (mean=0, std=1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Min-Max scaling (0 to 1)
min_max_scaler = MinMaxScaler()
X_minmax = min_max_scaler.fit_transform(X_train)

# Robust scaling (median-based)
robust_scaler = RobustScaler()
X_robust = robust_scaler.fit_transform(X_train)

80
New cards

How do you encode categorical variables?

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pandas as pd

# Label encoding (ordinal)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# One-hot encoding
encoder = OneHotEncoder(sparse_output=False)
X_encoded = encoder.fit_transform(X_categorical)

# Using pandas
dummies = pd.get_dummies(df['category'], prefix='cat')
df_encoded = pd.concat([df, dummies], axis

81
New cards

How do you perform cross-validation?

from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression

# Basic cross-validation
model = LogisticRegression()
scores = cross_val_score(model, X, y, cv=5)
print(f'CV Scores: {scores}')
print(f'Mean CV Score: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})')

# Stratified K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=skf)

82
New cards

How do you create a classification model?

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Logistic Regression
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

# Support Vector Machine
svm = SVC(random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

83
New cards

How do you handle imbalanced datasets?

from sklearn.utils import resample
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier

# Resampling minority class
minority_upsampled = resample(minority_class, 
                             replace=True, 
                             n_samples=len(majority_class))

# SMOTE (Synthetic Minority Oversampling)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Class weights
clf = RandomForestClassifier(class_weight='balanced')
clf.fit(X_train, y_train)

84
New cards

How do you perform feature selection?

from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.ensemble import RandomForestClassifier

# Univariate feature selection
selector = SelectKBest(score_func=f_classif, k=10)
X_selected = selector.fit_transform(X, y)

# Recursive Feature Elimination
rf = RandomForestClassifier()
rfe = RFE(estimator=rf, n_features_to_select=10)
X_rfe = rfe.fit_transform(X, y)

# Feature importance from tree-based models
rf.fit(X, y)
importances = rf.feature_importances_

85
New cards

How do you create a pipeline?

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Fit and predict
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

# Access individual steps
scaler = pipe.named_steps['scaler']
classifier = pipe.named_steps['classifier']

86
New cards

How do you tune hyperparameters?

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier

# Grid Search
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7, None],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
print(f'Best params: {grid_search.best_params_}')

87
New cards

How do you create a confusion matrix?

from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# Create confusion matrix
cm = confusion_matrix(y_true, y_pred)
print(cm)

# Visualize confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.title('Confusion Matrix')
plt.show()

# Classification report
print(classification_report(y_true, y_pred))

88
New cards

How do you perform clustering?

from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
import matplotlib.pyplot as plt

# K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
cluster_labels = kmeans.fit_predict(X)
centers = kmeans.cluster_centers_

# DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
db_labels = dbscan.fit_predict(X)

# Hierarchical clustering
agg_clustering = AgglomerativeClustering(n_clusters=3)
agg_labels = agg_clustering.fit_predict(X)

# Visualize clusters
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels)
plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, c='red')
plt.show()

89
New cards

How do you reduce dimensionality?

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Principal Component Analysis
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
print(f'Explained variance ratio: {pca.explained_variance_ratio_}')

# t-SNE
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X)

# Visualize reduced dimensions
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
plt.title('PCA')
plt.subplot(1, 2, 2)
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y)
plt.title('t-SNE')
plt.show()

90
New cards

How do you handle overfitting?

# Techniques to handle overfitting:

# 1. Cross-validation
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)

# 2. Regularization
from sklearn.linear_model import Ridge, Lasso
ridge = Ridge(alpha=1.0)  # L2 regularization
lasso = Lasso(alpha=1.0)  # L1 regularization

# 3. Early stopping (for neural networks)
# 4. Dropout (for neural networks)
# 5. Ensemble methods
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)

# 6. More training data
# 7. Feature selection
# 8. Reduce model complexity

91
New cards

How do you create lambda functions?

# Lambda functions (anonymous functions)
square = lambda x: x**2
print(square(5))  # 25

# With multiple arguments
add = lambda x, y: x + y
print(add(3, 4))  # 7

# Using with built-in functions
numbers = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x**2, numbers))
evens = list(filter(lambda x: x % 2 == 0, numbers))

# In pandas
df['new_col'] = df['old_col'].apply(lambda x: x * 2)

92
New cards

How do you handle exceptions?

# Basic try-except
try:
    result = 10 / 0
except ZeroDivisionError:
    print('Cannot divide by zero!')

# Multiple exceptions
try:
    value = int(input('Enter number: '))
    result = 10 / value
except ValueError:
    print('Invalid input!')
except ZeroDivisionError:
    print('Cannot divide by zero!')
finally:
    print('This always executes')

# Generic exception handler
try:
    risky_operation()
except Exception as e:
    print(f'Error occurred: {e}')

93
New cards

How do you read and write files?

# Reading files
with open('file.txt', 'r') as f:
    content = f.read()          # Read entire file
    lines = f.readlines()       # Read all lines
    first_line = f.readline()   # Read one line

# Writing files
with open('output.txt', 'w') as f:
    f.write('Hello, World!')
    f.writelines(['line1\n', 'line2\n'])

# Appending to files
with open('log.txt', 'a') as f:
    f.write('New log entry\n')

# Working with CSV
import csv
with open('data.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

94
New cards

How do you use map() and filter()?

# map() applies function to all items
numbers = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x**2, numbers))    # [1, 4, 9, 16, 25]
strings = list(map(str, numbers))               # ['1', '2', '3', '4', '5']

# filter() filters items based on condition
evens = list(filter(lambda x: x % 2 == 0, numbers))  # [2, 4]
positive = list(filter(lambda x: x > 0, [-1, 0, 1, 2]))  # [1, 2]

# Multiple iterables with map
list1 = [1, 2, 3]
list2 = [4, 5, 6]
sums = list(map(lambda x, y: x + y, list1, list2))  # [5, 7, 9]

95
New cards

How do you create decorators?

# Simple decorator
def my_decorator(func):
    def wrapper(*args, **kwargs):
        print('Before function call')
        result = func(*args, **kwargs)
        print('After function call')
        return result
    return wrapper

# Using decorator
@my_decorator
def greet(name):
    return f'Hello, {name}!'

# Decorator with arguments
def repeat(times):
    def decorator(func):
        def wrapper(*args, **kwargs):
            for _ in range(times):
                result = func(*args, **kwargs)
            return result
        return wrapper
    return decorator

@repeat(3)
def say_hello():
    print('Hello!')

96
New cards

How do you work with generators?

# Generator function
def fibonacci():
    a, b = 0, 1
    while True:
        yield a
        a, b = b, a + b

# Using generator
fib = fibonacci()
for _ in range(10):
    print(next(fib))

# Generator expression
squares = (x**2 for x in range(10))
print(list(squares))  # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

# Generator for memory efficiency
def read_large_file(file_path):
    with open(file_path, 'r') as f:
        for line in f:
            yield line.strip()

97
New cards

How do you use args and *kwargs?

# *args for variable positional arguments
def sum_all(*args):
    return sum(args)

print(sum_all(1, 2, 3, 4))  # 10

# **kwargs for variable keyword arguments
def print_info(**kwargs):
    for key, value in kwargs.items():
        print(f'{key}: {value}')

print_info(name='Alice', age=30, city='NYC')

# Combining both
def flexible_function(*args, **kwargs):
    print(f'Args: {args}')
    print(f'Kwargs: {kwargs}')

flexible_function(1, 2, 3, name='Alice', age=30)

# Unpacking arguments
numbers = [1, 2, 3, 4]
print(sum_all(*numbers))  # Unpacking list

info = {'name': 'Bob', 'age': 25}
print_info(**info)  # Unpacking dictionary

98
New cards

How do you create classes?

# Basic class
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
    
    def greet(self):
        return f'Hi, I am {self.name}'
    
    def have_birthday(self):
        self.age += 1
        return f'Happy birthday! Now {self.age}'

# Inheritance
class Student(Person):
    def __init__(self, name, age, student_id):
        super().__init__(name, age)
        self.student_id = student_id
    
    def study(self, subject):
        return f'{self.name} is studying {subject}'

# Using classes
person = Person('Alice', 25)
student = Student('Bob', 20, 'S12345')
print(person.greet())
print(student.study('Python'))

99
New cards

How do you work with JSON data?

import json

# Python dict to JSON string
data = {'name': 'Alice', 'age': 30, 'city': 'NYC'}
json_string = json.dumps(data, indent=2)
print(json_string)

# JSON string to Python dict
json_data = '{"name": "Bob", "age": 25}'
python_dict = json.loads(json_data)
print(python_dict)

# Save to JSON file
with open('data.json', 'w') as f:
    json.dump(data, f, indent=2)

# Load from JSON file
with open('data.json', 'r') as f:
    loaded_data = json.load(f)
    print(loaded_data)

# Handle datetime objects
from datetime import datetime
data_with_date = {'name': 'Alice', 'timestamp': datetime.now()}
# Need custom serializer for datetime

100
New cards

How do you measure execution time?

import time
from timeit import timeit

# Using time module
start_time = time.time()
# Your code here
time.sleep(1)  # Simulate work
end_time = time.time()
execution_time = end_time - start_time
print(f'Execution time: {execution_time:.2f} seconds')

# Using timeit for small code snippets
execution_time = timeit('sum(range(100))', number=10000)
print(f'Average time: {execution_time/10000:.6f} seconds')

# Context manager for timing
from contextlib import contextmanager

@contextmanager
def timer():
    start = time.time()
    yield
    end = time.time()
    print(f'Elapsed time: {end - start:.2f} seconds')

with timer():
    # Your code here
    sum(range(1000000))