-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsummarize.py
More file actions
32 lines (25 loc) · 945 Bytes
/
summarize.py
File metadata and controls
32 lines (25 loc) · 945 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# import pandas as pd
# df = pd.read_csv('fnma-dataset-classified.txt', sep='|')
# ohe = []
# for _, row in df.iterrows():
# if (not pd.isna(row['classes'])):
# pool_classes = row['classes'].split(',')
# int_arr = [int(p) for p in pool_classes]
# row = []
# for i in range(10):
# if i in int_arr:
# row.append(1)
# else:
# row.append(0)
# ohe.append(row)
# cols = ['pool_1', 'pool_2', 'pool_3', 'pool_4', 'pool_5', 'pool_6', 'pool_7', 'pool_8', 'pool_9', 'pool_10']
# ohe_df = pd.DataFrame(ohe, columns=cols)
# print(ohe_df.head())
# for col in cols:
# print(ohe_df[col].value_counts())
# # -----------------------------
# # import os
# # for filename in os.listdir(os.getcwd() + "/output"):
# # df = pd.read_csv(os.path.join("output/" + filename), sep='|')
# # if len(df.index) > 5:
# # print(f"{len(df)} {filename}")