-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata1.py
More file actions
42 lines (29 loc) · 1.4 KB
/
data1.py
File metadata and controls
42 lines (29 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pandas as pd
import numpy as np
filepath = "car_data_250.csv"
df = pd.read_csv(filepath)
'''print(df.head(10))
print(f'The last 10 rows of the dataframe {df.tail(10)}')'''
# create headers list
'''headers = ["symboling","normalized-losses","make","fuel-type","aspiration", "num-of-doors","body-style",
"drive-wheels","engine-location","wheel-base", "length","width","height","curb-weight","engine-type",
"num-of-cylinders", "engine-size","fuel-system","bore","stroke","compression-ratio","horsepower",
"peak-rpm","city-mpg","highway-mpg","price"]
df.columns = headers'''
#Find the name of the columns
# print(df.columns)
'''df1 = df.replace('?',np.nan)
print(df1.head(10))'''
#Removing data with NaN values in the "price column" (the contents along the entire row will be dropped wherever the entity 'price' is found to be NaN)
'''df = df1.dropna(subset=["price"], axis=0)
print(df.head(10))
df.to_csv("automobile.csv", index=False)'''
## check the data type of data frame "df" by .dtypes
#print(df.dtypes)
#provide various summary statistics, excluding NaN (Not a Number) values.
#print(df.describe())
#provides the statistical summary of all the columns, including object-typed attributes.
#print(df.describe(include="all"))
#Apply the method to ".describe()" to the columns 'length' and 'compression-ratio'.
#print(df[['length', 'compression-ratio']].describe())
print(df.info())