Skip to content

Commit 2708ecd

Browse files
authored
Merge pull request #20 from ParallelGSReg/development
Add options
2 parents 47bcbc0 + bd3d0ea commit 2708ecd

15 files changed

Lines changed: 147 additions & 48 deletions

File tree

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "ModelSelection"
22
uuid = "51b78214-913b-40cf-9250-c64eb33811f7"
33
authors = ["Demian Panigo <panigo@gmail.com>", "Adán Mauri Ungaro <adan.mauri@gmail.com>", "Nicolás Monzón <nicomzn4@gmail.com>", "Valentin Mari <valentinmari@hotmail.com>"]
4-
version = "1.3.0"
4+
version = "1.3.1"
55

66
[deps]
77
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"

TODO.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
# TBD
77
- Validation in every module
8-
- Documentation in AllSubsetRegression
8+
- Pending documentation in AllSubsetRegression
99
- Major refactoring
1010
- General documentation
11+
- Notify progress in modules based in the time that takes in average

src/AllSubsetRegression/core.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ function all_subset_regression(
5959
modelavg::Bool = MODELAVG_DEFAULT,
6060
residualtest::Bool = RESIDUALTEST_DEFAULT,
6161
orderresults::Bool = ORDERRESULTS_DEFAULT,
62+
notify = nothing
6263
)
6364
validate_test(ttest = ttest, ztest = ztest)
6465

@@ -71,6 +72,7 @@ function all_subset_regression(
7172
modelavg = modelavg,
7273
residualtest = residualtest,
7374
orderresults = orderresults,
75+
notify = notify ,
7476
)
7577
elseif estimator == :logit
7678
AllSubsetRegression.logit!(
@@ -81,6 +83,7 @@ function all_subset_regression(
8183
modelavg = modelavg,
8284
residualtest = residualtest,
8385
orderresults = orderresults,
86+
notify = notify ,
8487
)
8588
else
8689
throw(ArgumentError(INVALID_ESTIMATOR))

src/AllSubsetRegression/estimators/logit.jl

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ function logit(
4949
modelavg::Bool = MODELAVG_DEFAULT,
5050
residualtest::Bool = RESIDUALTEST_DEFAULT,
5151
orderresults::Bool = ORDERRESULTS_DEFAULT,
52+
notify = nothing,
5253
)
5354
return logit!(
5455
ModelSelection.copy_modelselectiondata(data),
@@ -58,6 +59,7 @@ function logit(
5859
modelavg = modelavg,
5960
residualtest = residualtest,
6061
orderresults = orderresults,
62+
notify = notify ,
6163
)
6264
end
6365

@@ -110,7 +112,9 @@ function logit!(
110112
modelavg::Bool = MODELAVG_DEFAULT,
111113
residualtest::Bool = RESIDUALTEST_DEFAULT,
112114
orderresults::Bool = ORDERRESULTS_DEFAULT,
115+
notify = nothing,
113116
)
117+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :logit, :progress => 0))
114118
validate_criteria(criteria, AVAILABLE_LOGIT_CRITERIA)
115119
result = create_result(
116120
data,
@@ -121,7 +125,7 @@ function logit!(
121125
orderresults,
122126
ztest = ztest,
123127
)
124-
logit_execute!(data, result)
128+
logit_execute!(data, result, notify = notify )
125129
ModelSelection.addresult!(data, result)
126130
data = addextras!(data, result)
127131
return data
@@ -153,7 +157,9 @@ explanatory variables in the given `ModelSelectionData`. This function mutates t
153157
logit_execute!(model_selection_data, all_subset_regression_result)
154158
```
155159
"""
156-
function logit_execute!(data::ModelSelectionData, result::AllSubsetRegressionResult)
160+
function logit_execute!(data::ModelSelectionData, result::AllSubsetRegressionResult; notify = nothing)
161+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :logit, :progress => 5))
162+
157163
if !data.removemissings
158164
data = ModelSelection.filter_data_by_empty_values!(data)
159165
end
@@ -189,6 +195,7 @@ function logit_execute!(data::ModelSelectionData, result::AllSubsetRegressionRes
189195
fullexpvars_without_outsample_subset =
190196
hcat(expvars_without_outsample_subset, fixedvariables_without_outsample_subset)
191197
end
198+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :logit, :progress => 15))
192199

193200
gum_model = GLM.fit(
194201
GeneralizedLinearModel,
@@ -200,6 +207,7 @@ function logit_execute!(data::ModelSelectionData, result::AllSubsetRegressionRes
200207
)
201208
start_coef = coeftable(gum_model).cols[1]
202209

210+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :logit, :progress => 25))
203211
if nprocs() == nworkers()
204212
for order = 1:num_operations
205213
# TODO: Split in multiple lines
@@ -287,6 +295,7 @@ function logit_execute!(data::ModelSelectionData, result::AllSubsetRegressionRes
287295
end
288296
end
289297
end
298+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :logit, :progress => 75))
290299

291300
result.data = Array(result_data)
292301

@@ -351,6 +360,7 @@ function logit_execute!(data::ModelSelectionData, result::AllSubsetRegressionRes
351360
end
352361

353362
result.nobs = result.bestresult_data[datanames_index[:nobs]]
363+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :logit, :progress => 100))
354364

355365
return result
356366
end

src/AllSubsetRegression/estimators/ols.jl

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ function ols(
4949
modelavg::Bool = MODELAVG_DEFAULT,
5050
residualtest::Bool = RESIDUALTEST_DEFAULT,
5151
orderresults::Bool = ORDERRESULTS_DEFAULT,
52+
notify = nothing,
5253
)
5354
return ols!(
5455
ModelSelection.copy_modelselectiondata(data),
@@ -58,6 +59,7 @@ function ols(
5859
modelavg = modelavg,
5960
residualtest = residualtest,
6061
orderresults = orderresults,
62+
notify = notify ,
6163
)
6264
end
6365

@@ -112,7 +114,9 @@ function ols!(
112114
modelavg::Bool = MODELAVG_DEFAULT,
113115
residualtest::Bool = RESIDUALTEST_DEFAULT,
114116
orderresults::Bool = ORDERRESULTS_DEFAULT,
117+
notify = nothing
115118
)
119+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :ols, :progress => 0))
116120
validate_criteria(criteria, AVAILABLE_OLS_CRITERIA)
117121
result = create_result(
118122
data,
@@ -123,7 +127,7 @@ function ols!(
123127
orderresults,
124128
ttest = ttest,
125129
)
126-
ols_execute!(data, result)
130+
ols_execute!(data, result, notify=notify)
127131
ModelSelection.addresult!(data, result)
128132
data = addextras!(data, result)
129133
return data
@@ -155,7 +159,9 @@ explanatory variables in the given `ModelSelectionData`. This function mutates t
155159
ols_execute!(model_selection_data, all_subset_regression_result)
156160
```
157161
"""
158-
function ols_execute!(data::ModelSelectionData, result::AllSubsetRegressionResult)
162+
function ols_execute!(data::ModelSelectionData, result::AllSubsetRegressionResult; notify = nothing)
163+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :ols, :progress => 5))
164+
159165
if !data.removemissings
160166
data = ModelSelection.filter_data_by_empty_values!(data)
161167
end
@@ -174,6 +180,7 @@ function ols_execute!(data::ModelSelectionData, result::AllSubsetRegressionResul
174180
result_data =
175181
fill!(SharedArray{data.datatype}(num_operations, size(result.datanames, 1)), NaN)
176182
datanames_index = ModelSelection.create_datanames_index(result.datanames)
183+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :ols, :progress => 25))
177184
if nprocs() == nworkers()
178185
for order = 1:num_operations
179186
ols_execute_row!(
@@ -289,7 +296,7 @@ function ols_execute!(data::ModelSelectionData, result::AllSubsetRegressionResul
289296
) ./ std(result.data[:, datanames_index[criteria]])
290297
)
291298
end
292-
299+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :ols, :progress => 75))
293300
if result.modelavg
294301
delta =
295302
maximum(result.data[:, datanames_index[:order]]) .-
@@ -352,6 +359,7 @@ function ols_execute!(data::ModelSelectionData, result::AllSubsetRegressionResul
352359
result.bestresult_data[datanames_index[:nobs]] =
353360
Int64(round(result.bestresult_data[datanames_index[:nobs]]))
354361
result.nobs = result.bestresult_data[datanames_index[:nobs]]
362+
ModelSelection.notification(notify, "Performing All Subset Regression", Dict(:estimator => :ols, :progress => 100))
355363
return result
356364
end
357365

src/CrossValidation/core.jl

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,21 @@ function kfoldcrossvalidation!(
3232
previousresult::ModelSelection.ModelSelectionData,
3333
data::ModelSelection.ModelSelectionData,
3434
k::Int,
35-
s::Float64,
35+
s::Float64;
36+
notify = nothing,
3637
)
37-
kfoldcrossvalidation(previousresult, data, k, s)
38+
kfoldcrossvalidation(previousresult, data, k, s; notify = notify )
3839
end
3940

4041

4142
function kfoldcrossvalidation(
4243
previousresult::ModelSelection.ModelSelectionData,
4344
data::ModelSelection.ModelSelectionData,
4445
k::Int,
45-
s::Float64,
46+
s::Float64;
47+
notify = nothing,
4648
)
47-
49+
ModelSelection.notification(notify, "Performing Cross validation", Dict(:progress => 0))
4850
#db = randperm(data.nobs)
4951
db = collect(1:data.nobs)
5052
folds = split_database(db, k)
@@ -64,6 +66,10 @@ function kfoldcrossvalidation(
6466

6567
bestmodels = []
6668

69+
progress = 0
70+
step = floor(Int64, 50 / k)
71+
ModelSelection.notification(notify, "Performing Cross validation", Dict(:progress => progress))
72+
6773
for obs in LOOCV(k)
6874
dataset = collect(Iterators.flatten(folds[obs]))
6975
testset = setdiff(1:data.nobs, dataset)
@@ -107,8 +113,12 @@ function kfoldcrossvalidation(
107113
:datanames => backup.results[1].datanames,
108114
),
109115
)
116+
progress = progress + step
117+
ModelSelection.notification(notify, "Performing Cross validation", Dict(:progress => progress))
110118
end
111119

120+
ModelSelection.notification(notify, "Performing Cross validation", Dict(:progress => 50))
121+
112122
datanames = unique(Iterators.flatten(model[:datanames] for model in bestmodels))
113123

114124
data = Array{Any,2}(zeros(size(bestmodels, 1), size(datanames, 1)))
@@ -145,6 +155,7 @@ function kfoldcrossvalidation(
145155
previousresult = ModelSelection.addresult!(previousresult, result)
146156

147157
addextras(previousresult, result)
158+
ModelSelection.notification(notify, "Performing Cross validation", Dict(:progress => 100))
148159

149160
return previousresult
150161
end

src/FeatureExtraction/core.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ function featureextraction(
66
fe_lag::Union{Array{Pair{Symbol,Int64}},Array{Pair{String,Int64}},Nothing} = nothing,
77
interaction::Union{Nothing,Array,Dict} = nothing,
88
removemissings::Bool = REMOVEMISSINGS_DEFAULT,
9+
notify = nothing,
910
)
1011
return featureextraction!(
1112
ModelSelection.copy_modelselectiondata(data),
@@ -15,6 +16,7 @@ function featureextraction(
1516
fe_lag = fe_lag,
1617
interaction = interaction,
1718
removemissings = removemissings,
19+
notify = notify ,
1820
)
1921
end
2022

@@ -26,6 +28,7 @@ function featureextraction!(
2628
fe_lag::Union{Array{Pair{Symbol,Int64}},Array{Pair{String,Int64}},Nothing} = nothing,
2729
interaction::Union{Nothing,Array,Dict} = nothing,
2830
removemissings::Bool = REMOVEMISSINGS_DEFAULT,
31+
notify = nothing,
2932
)
3033
data = execute!(
3134
data,
@@ -35,6 +38,7 @@ function featureextraction!(
3538
fe_lag = fe_lag,
3639
interaction = interaction,
3740
removemissings = removemissings,
41+
notify = notify ,
3842
)
3943

4044
data = addextras(data, fe_sqr, fe_log, fe_inv, fe_lag, interaction, removemissings)
@@ -50,7 +54,9 @@ function execute!(
5054
fe_lag::Union{Array{Pair{Symbol,Int64}},Array{Pair{String,Int64}},Nothing} = nothing,
5155
interaction::Union{Nothing,Array,Dict} = nothing,
5256
removemissings::Bool = REMOVEMISSINGS_DEFAULT,
57+
notify = nothing,
5358
)
59+
ModelSelection.notification(notify, "Performing Feature extraction", Dict(:progress => 0))
5460
if data.intercept
5561
ModelSelection.remove_intercept!(data)
5662
end
@@ -87,6 +93,6 @@ function execute!(
8793
end
8894

8995
data = ModelSelection.convert_data!(data)
90-
96+
ModelSelection.notification(notify, "Performing Feature extraction", Dict(:progress => 100))
9197
return data
9298
end

src/ModelSelection/core/gsr.jl

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ function gsr(
3434
kfoldcrossvalidation::Bool = CrossValidation.KFOLDCROSSVALIDATION_DEFAULT,
3535
numfolds::Int = CrossValidation.NUMFOLDS_DEFAULT,
3636
testsetshare::Union{Float32,Float64} = CrossValidation.TESTSETSHARE_DEFAULT,
37-
notify = NOTIFY_DEFAULT,
37+
notify = nothing,
3838
)
3939
gsr(
4040
estimator,
@@ -104,12 +104,10 @@ function gsr(
104104
kfoldcrossvalidation::Bool = CrossValidation.KFOLDCROSSVALIDATION_DEFAULT,
105105
numfolds::Int = CrossValidation.NUMFOLDS_DEFAULT,
106106
testsetshare::Union{Float32,Float64} = CrossValidation.TESTSETSHARE_DEFAULT,
107-
notify = NOTIFY_DEFAULT,
107+
notify = nothing,
108108
)
109109
removemissings = fe_lag === nothing
110110

111-
# TODO: Move notification to every module
112-
notification(notify, "Processing parameters")
113111
data = Preprocessing.input(
114112
equation,
115113
data = data,
@@ -122,11 +120,10 @@ function gsr(
122120
seasonaladjustment = seasonaladjustment,
123121
removeoutliers = removeoutliers,
124122
removemissings = removemissings,
123+
notify = notify,
125124
)
126125

127126
if featureextraction_enabled(fe_sqr, fe_log, fe_inv, fe_lag, interaction)
128-
# TODO: Move notification to every module
129-
notification(notify, "Performing feature extraction")
130127
data = FeatureExtraction.featureextraction!(
131128
data,
132129
fe_sqr = fe_sqr,
@@ -135,15 +132,14 @@ function gsr(
135132
fe_inv = fe_inv,
136133
interaction = interaction,
137134
removemissings = true,
135+
notify = notify ,
138136
)
139137
end
140138

141139
original_data = copy_modelselectiondata(data)
142140

143141
if preliminaryselection_enabled(preliminaryselection)
144-
# TODO: Move notification to every module
145-
notification(notify, "Performing preliminary selection")
146-
data = PreliminarySelection.preliminary_selection!(preliminaryselection, data)
142+
data = PreliminarySelection.preliminary_selection!(preliminaryselection, data, notify = notify )
147143
original_data.extras = data.extras
148144
end
149145

@@ -157,17 +153,43 @@ function gsr(
157153
modelavg = modelavg,
158154
residualtest = residualtest,
159155
orderresults = orderresults,
156+
notify = notify ,
160157
)
161158

162159
original_data.extras = data.extras
163160

164161
if crossvalidation_enabled(kfoldcrossvalidation)
165-
# TODO: Move notification to every module
166-
notification(notify, "Performing cross validation")
167-
CrossValidation.kfoldcrossvalidation!(data, original_data, numfolds, testsetshare)
162+
CrossValidation.kfoldcrossvalidation!(data, original_data, numfolds, testsetshare, notify = notify)
168163
end
169164

170165
data.original_data = original_data
171166

167+
data.options[:estimator] = estimator
168+
data.options[:equation] = equation
169+
data.options[:datanames] = datanames
170+
data.options[:method] = method
171+
data.options[:intercept] = intercept
172+
data.options[:panel] = panel
173+
data.options[:time] = time
174+
data.options[:seasonaladjustment] = seasonaladjustment
175+
data.options[:removeoutliers] = removeoutliers
176+
data.options[:fe_sqr] = fe_sqr
177+
data.options[:fe_log] = fe_log
178+
data.options[:fe_inv] = fe_inv
179+
data.options[:fe_lag] = fe_lag
180+
data.options[:interaction] = interaction
181+
data.options[:preliminaryselection] = preliminaryselection
182+
data.options[:fixedvariables] = fixedvariables
183+
data.options[:outsample] = outsample
184+
data.options[:criteria] = criteria
185+
data.options[:ttest] = ttest
186+
data.options[:ztest] = ztest
187+
data.options[:modelavg] = modelavg
188+
data.options[:residualtest] = residualtest
189+
data.options[:orderresults] = orderresults
190+
data.options[:kfoldcrossvalidation] = kfoldcrossvalidation
191+
data.options[:numfolds] = numfolds
192+
data.options[:testsetshare] = testsetshare
193+
172194
return data
173195
end

0 commit comments

Comments
 (0)