import pandas as pd # Sample messy dataset data = { 'name': ['Alice', 'Bob', 'Charlie', 'David', None], 'score': [90, None, 85, 95, 88], 'age': [25, 30, None, 40, 35] } df = pd.DataFrame(data) # Remove rows with missing data df_cleaned = df.dropna() print(df_cleaned)
int
float
str
df['age'] = df['age'].astype(int)
df_no_duplicates = df.drop_duplicates(subset=['name']) print(df_no_duplicates)
avg_score = df['score'].mean() median_score = df['score'].median() std_score = df['score'].std() print("Average:", avg_score) print("Median:", median_score) print("Standard Deviation:", std_score)