-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxgb_load.py
More file actions
71 lines (61 loc) · 2.04 KB
/
xgb_load.py
File metadata and controls
71 lines (61 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import operator
import matplotlib.pyplot as plt
__author__ = 'osboxes'
import xgboost as xgb
import pandas as pd
import numpy as np
from datetime import datetime
gbm = xgb.Booster()
gbm.load_model('saved_models/xgb_001.model')
test = pd.read_csv('test_processed.csv',delimiter=',', parse_dates=[4], index_col=0)
#select features
features = ['Store',
'DayOfWeek',
#'Date'
#'Sales',
'Promo',
'StateHoliday',
'SchoolHoliday',
'StoreType',
'Assortment',
'CompetitionDistance',
'CompetitionOpenSinceMonth',
'CompetitionOpenSinceYear',
'Promo2',
'Promo2SinceWeek',
'Promo2SinceYear',
'Year',
'Month',
'Day',
#'JanPromo',
#'FebPromo',
#'MarPromo',
#'AprPromo',
#'MayPromo',
#'JunPromo',
#'JulPromo',
#'SeptPromo',
#'OctPromo',
#'NovPromo',
#'DecPromo',
'CompetitionOpenDeltaMonths',
'PromoOpenDeltaMonths'
]
data = test[features].values
print("Make predictions on the test set")
dtest = xgb.DMatrix(data)
test_probs = gbm.predict(dtest)
# Make Submission
result = pd.DataFrame({"Id": test['Id'], 'Sales': np.expm1(test_probs)})
result.to_csv("xgboost_submission.csv", index=False)
# XGB feature importances
# Based on https://www.kaggle.com/mmueller/liberty-mutual-group-property-inspection-prediction/xgb-feature-importance-python/code
importance = gbm.get_fscore()
importance = sorted(importance.items(), key=operator.itemgetter(1))
df = pd.DataFrame(importance, columns=['feature', 'fscore'])
df['fscore'] = df['fscore'] / df['fscore'].sum()
featp = df.plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(6, 10))
plt.title('XGBoost Feature Importance')
plt.xlabel('relative importance')
fig_featp = featp.get_figure()
fig_featp.savefig('feature_importance_xgb.png', bbox_inches='tight', pad_inches=1)