-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModelCreation.py
More file actions
104 lines (66 loc) · 2.61 KB
/
ModelCreation.py
File metadata and controls
104 lines (66 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
# coding: utf-8
# In[1]:
from azureml.core import Workspace, Dataset, Experiment
print('Accessing the workspace....')
ws = Workspace.from_config("./config")
print('Accessing the dataset....')
az_dataset = Dataset.get_by_name(ws, 'Deploydataset')
# -----------------------------------------------------
# Create/Access an experiment object
# ----------------------------------------------------
print('Accessing/Creating the experiment...')
experiment = Experiment(workspace=ws, name='Webservice-exp001')
# -----------------------------------------------------
# Run an experiment using start_logging method
# -----------------------------------------------------
print('Start Experiment using Start Logging method...')
new_run = experiment.start_logging()
# --------------------------------------------------------
# Do your stuff here
# --------------------------------------------------------
import pandas as pd
# Load the data from the local files
print('Loading the dataset to pandas dataframe...')
df = az_dataset.to_pandas_dataframe()
# Create X and Y Variables
X = df.iloc[:, :-1]
Y = df.iloc[:, -1:]
# Create dummy variables
X = pd.get_dummies(X)
# Extract column names including dummy variables
train_enc_cols = X.columns
# Transform Categorical columns in Y dataset to dummy
Y = pd.get_dummies(Y)
Y = Y.iloc[:,-1]
# Split Data - X and Y datasets are training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 1234, stratify=Y)
# Build the Random Forest model
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=1234)
# Fit the data to the Random Forest object - Train Model
trained_model = rfc.fit(X_train, Y_train)
# Predict the outcome using Test data - Score Model
Y_predict = rfc.predict(X_test)
# Get the probability score - Scored Probabilities
Y_prob = rfc.predict_proba(X_test)[:, 1]
# Get Confusion matrix and the accuracy/score - Evaluate
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, Y_predict)
score = rfc.score(X_test, Y_test)
# Always log the primary metric
new_run.log("accuracy", score)
# In[2]:
# -------------------------------------------------------
# Save all the transformations and models
# -------------------------------------------------------
import joblib
model_file = './outputs/models.pkl'
joblib.dump(value=[train_enc_cols, trained_model],
filename=model_file)
# Complete the run
new_run.complete()
# Get the Run IDs from the experiment
list(experiment.get_runs())
# In[ ]: