forked from agoscinski/EnsembleMethodsForFeatureSelection
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.py
More file actions
115 lines (92 loc) · 3.62 KB
/
analysis.py
File metadata and controls
115 lines (92 loc) · 3.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from experiments import DataSetExperiment
from benchmarks import MeasureBenchmark, AccuracyBenchmark
from sklearn.neighbors import KNeighborsClassifier
from sklearn_utilities import SVC_Grid
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegressionCV
import robustness_measure
import goodness_measure
from feature_selector import DummyFeatureSelector
default_classifiers = [
KNeighborsClassifier(3),
SVC_Grid(kernel="linear"),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
LogisticRegressionCV(penalty='l1', solver='liblinear')
]
def run(data_sets, feature_selectors, jaccard_percentage=0.01, classifiers=None,
measures=None, save=True, prefix=""):
if isinstance(data_sets, str):
data_sets = [data_sets]
if classifiers is None:
classifiers = default_classifiers
if measures is None:
measures = [
robustness_measure.JaccardIndex(percentage=jaccard_percentage)
]
if len(prefix) > 0:
prefix += "_"
robustness_exp = DataSetExperiment(
MeasureBenchmark(measures),
feature_selectors
)
accuracy_exp = DataSetExperiment(
AccuracyBenchmark(classifiers, percentage_of_features=jaccard_percentage),
feature_selectors
)
jcp = int(jaccard_percentage * 1e3)
robustness_exp.run(data_sets)
if save:
robustness_exp.save_results(prefix + "jc{}_robustness".format(jcp))
accuracy_exp.run(data_sets)
if save:
accuracy_exp.save_results(prefix + "jc{}_accuracy".format(jcp))
def artificial(feature_selectors, jaccard_percentage=0.01, save=True, classifiers=None):
if classifiers is None:
classifiers = default_classifiers
robustness_exp = DataSetExperiment(
MeasureBenchmark([
robustness_measure.JaccardIndex(percentage=jaccard_percentage)
]),
feature_selectors
)
precision_exp = DataSetExperiment(
MeasureBenchmark([
goodness_measure.Precision("artificial", 100),
goodness_measure.Precision("artificial", 200),
goodness_measure.Precision("artificial", 300),
goodness_measure.Precision("artificial", 400),
goodness_measure.Precision("artificial", 500),
goodness_measure.Precision("artificial", 600),
goodness_measure.Precision("artificial", 700),
goodness_measure.Precision("artificial", 800),
goodness_measure.Precision("artificial", 900),
goodness_measure.Precision("artificial", 1000),
goodness_measure.XPrecision("artificial")
]),
feature_selectors
)
accuracy_exp = DataSetExperiment(
AccuracyBenchmark(classifiers, percentage_of_features=jaccard_percentage),
feature_selectors
)
jcp = int(jaccard_percentage * 1e3)
robustness_exp.run("artificial")
if save:
robustness_exp.save_results("artificial_jc{}_robustness".format(jcp))
precision_exp.run("artificial")
if save:
precision_exp.save_results("artificial_jc{}_precision".format(jcp))
accuracy_exp.run("artificial")
if save:
accuracy_exp.save_results("artificial_jc{}_accuracy".format(jcp))
def accuracy_with_all_features(data_sets, classifiers=None):
if classifiers is None:
classifiers = default_classifiers
if isinstance(data_sets, str):
data_sets = [data_sets]
accuracy_exp = DataSetExperiment(
AccuracyBenchmark(classifiers, percentage_of_features=100),
DummyFeatureSelector()
)
accuracy_exp.run(data_sets)
accuracy_exp.save_results("all_features")