-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path2nd Module.py
More file actions
78 lines (47 loc) · 1.68 KB
/
2nd Module.py
File metadata and controls
78 lines (47 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
#Importing Packages
import pandas as pd
from sklearn import preprocessing
import matplotlib.pyplot as plt
from pandas.plotting import parallel_coordinates
#Importing Data
dktc = pd.read_csv('/home/AliAzzam/Downloads/Surge Clustring/SurgeBase.csv')
#EDA
dktc.info()
pd.set_option('display.precision',1)
dktc.describe()
dktc.info()
cols = dktc.columns.tolist()
print(cols)
cols_n = cols[3:8]
dksub = dktc[cols_n].copy() #new n-only dataframe
dksub.info() #copy make sure changes to
# Applying Pre-Processing
mmscaler = preprocessing.MinMaxScaler()
dksub_mm = pd.DataFrame(mmscaler.fit_transform(dksub),columns = dksub.columns)
dksub_mm.describe()
#Clustering USing ML
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
Z = linkage(dksub_mm, metric='euclidean', method='single')
Zavg = linkage(dksub_mm, metric='euclidean', method='average')
Zward = linkage(dksub_mm, metric='euclidean', method='ward')
#Polting Dendogram for Average Linkage method
th_avg = 0.52
dendrogram(Zavg, color_threshold=th_avg)
plt.axhline(y=th_avg, c='grey', lw=2, linestyle='dashed')
plt.text(150,th_avg, 't='+str(th_avg))
#Polting Dendogram for ward method
th_ward = 4
dendrogram(Zward, color_threshold=th_ward)
plt.axhline(y=th_ward, c='grey', lw=2, linestyle='dashed')
plt.text(150,th_ward, 't='+str(th_ward))
plt.title("Dendrogram with Wards Method for Clustering")
#Mapping CLusters to datapoints
dktc['Wards'] = fcluster(Zward, 5, criterion='maxclust')
dktc['Average_Linkage'] = fcluster(Zavg, 5, criterion='maxclust')
#Ecporting Data
dktc.to_excel('Surge Clustring/F5C.xlsx')