-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata2.py
More file actions
27 lines (19 loc) · 1.04 KB
/
data2.py
File metadata and controls
27 lines (19 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
headers = ["symboling","normalized-losses","make", "fuel-type","aspiration", "num-of-doors","body-style", "drive-wheels","engine-location","wheel-base", "length","width","height","curb-weight","engine-type", "num-of-cylinders", "engine-size","fuel-system","bore","stroke","compression-ratio","horsepower", "peak-rpm","city-mpg","highway-mpg","price"]
df = pd.read_csv("auto.csv", names = headers)
df.replace("?", np.nan, inplace = True)
#print(df.head())
missing_data = df.isnull()
#print(missing_data.head(5))
#count missing values in each column
'''for column in missing_data.columns.values.tolist():
print(column)
print(missing_data[column].value_counts())
print("")'''
#Calculate the mean value for the "normalized-losses" column
avg_norm_loss = df["normalized-losses"].astype("float").mean(axis=0)
print(f"Average of normalized-losses: {avg_norm_loss}")
#Replace "NaN" with mean value in normalized column
df["normalized-losses"].replace(np.nan, avg_norm_loss, inplace=True)