Skip to content

Commit c536d89

Browse files
committed
This ch3 env builds and works for all the cases tested now.
1 parent c11a9d0 commit c536d89

2 files changed

Lines changed: 9 additions & 0 deletions

File tree

Chapter03/pipelines/sklearn_pipeline.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from sklearn.impute import SimpleImputer
44
from sklearn.preprocessing import StandardScaler, OneHotEncoder
55
from sklearn.linear_model import LogisticRegression
6+
from sklearn.model_selection import train_test_split
7+
import pandas as pd
68

79
numeric_features = ['age', 'balance']
810
numeric_transformer = Pipeline(steps=[
@@ -22,5 +24,10 @@
2224
clf_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
2325
('classifier', LogisticRegression())])
2426

27+
df = pd.read_csv('../../Chapter01/classifying/bank_data/bank.csv', delimiter=';', decimal=',')
28+
X, y = df.drop('y', axis=1), df['y'].apply(lambda x: 1 if x == 'yes' else 0)
29+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
30+
# You need ot get
2531
clf_pipeline.fit(X_train, y_train)
2632

33+
print(clf_pipeline.predict(X_test))

Chapter03/pipelines/sparkmllib_pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,5 @@
6868
# Define the entire pipeline and fit on the train data and transform on the test data
6969
clfPipeline = Pipeline().setStages(stages).fit(trainingData)
7070
clfPipeline.transform(testData)
71+
72+
print(clfPipeline.transform(testData).show())

0 commit comments

Comments
 (0)