From c8aa61ceb9d9a05eee229eaaeb049a9188996f97 Mon Sep 17 00:00:00 2001 From: CompatHelper Julia Date: Sat, 13 Dec 2025 02:01:05 +0000 Subject: [PATCH 1/4] CompatHelper: bump compat for MLJScikitLearnInterface in [extras] to 0.7, (keep existing compat) --- Project.toml | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/Project.toml b/Project.toml index 0ae8b54..a512977 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FeatureSelection" uuid = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" -authors = ["Anthony D. Blaom ", "Samuel Okon ", "Samuel Okon Date: Mon, 6 Apr 2026 11:53:31 +1200 Subject: [PATCH 2/4] fix bug to close #34 --- src/models/rfe.jl | 3 ++- test/models/rfe.jl | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/models/rfe.jl b/src/models/rfe.jl index 10a49fe..c87f1d4 100644 --- a/src/models/rfe.jl +++ b/src/models/rfe.jl @@ -22,7 +22,7 @@ const ERR_FEATURES_SEEN = ArgumentError( const MODEL_TYPES = [ :ProbabilisticRecursiveFeatureElimination, :DeterministicRecursiveFeatureElimination ] -const SUPER_TYPES = [:Deterministic, :Probabilistic] +const SUPER_TYPES = [:Probabilistic, :Deterministic] const MODELTYPE_GIVEN_SUPERTYPES = zip(MODEL_TYPES, SUPER_TYPES) for (ModelType, ModelSuperType) in MODELTYPE_GIVEN_SUPERTYPES @@ -181,6 +181,7 @@ function RecursiveFeatureElimination( # which is rare. throw(ERR_MODEL_TYPE) end + message = MMI.clean!(selector) isempty(message) || @warn(message) return selector diff --git a/test/models/rfe.jl b/test/models/rfe.jl index fb635c9..c9251d3 100644 --- a/test/models/rfe.jl +++ b/test/models/rfe.jl @@ -51,7 +51,9 @@ const DTM = DummyTestModels selector = RecursiveFeatureElimination(model=rf) selector2 = RecursiveFeatureElimination(model=rf2) @test selector isa FeatureSelection.DeterministicRecursiveFeatureElimination + @test selector isa MLJBase.Deterministic @test selector2 isa FeatureSelection.ProbabilisticRecursiveFeatureElimination + @test selector2 isa MLJBase.Probabilistic @test MLJBase.constructor(selector) == RecursiveFeatureElimination # Fit models From 8840a556e70b169ffac2878bc5e40adce2e47152 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 6 Apr 2026 11:54:14 +1200 Subject: [PATCH 3/4] bump 0.2.5 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0ae8b54..2d29afb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FeatureSelection" uuid = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" authors = ["Anthony D. Blaom ", "Samuel Okon Date: Mon, 6 Apr 2026 12:55:09 +1200 Subject: [PATCH 4/4] re-organize code to resolve #33 oops --- src/FeatureSelection.jl | 1 + src/models/featureselector.jl | 80 +-------------------------------- src/type_docstrings.jl | 81 ++++++++++++++++++++++++++++++++++ test/models/featureselector.jl | 1 + test/models/rfe.jl | 6 ++- 5 files changed, 88 insertions(+), 81 deletions(-) create mode 100644 src/type_docstrings.jl diff --git a/src/FeatureSelection.jl b/src/FeatureSelection.jl index a2e8a54..17b9083 100644 --- a/src/FeatureSelection.jl +++ b/src/FeatureSelection.jl @@ -10,5 +10,6 @@ const MMI = MLJModelInterface include("models/featureselector.jl") include("models/rfe.jl") include("shared.jl") +include("type_docstrings.jl") end # module diff --git a/src/models/featureselector.jl b/src/models/featureselector.jl index 470de9b..9590cc9 100644 --- a/src/models/featureselector.jl +++ b/src/models/featureselector.jl @@ -87,82 +87,4 @@ MMI.metadata_model( load_path = "FeatureSelection.FeatureSelector" ) -## Docstring -""" -$(MMI.doc_header(FeatureSelector)) - -Use this model to select features (columns) of a table, usually as -part of a model `Pipeline`. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any table of input features, where "table" is in the sense of Tables.jl - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: one of the following, with the behavior indicated: - - - `[]` (empty, the default): filter out all features (columns) which - were not encountered in training - - - non-empty vector of feature names (symbols): keep only the - specified features (`ignore=false`) or keep only unspecified - features (`ignore=true`) - - - function or other callable: keep a feature if the callable returns - `true` on its name. For example, specifying - `FeatureSelector(features = name -> name in [:x1, :x3], ignore = - true)` has the same effect as `FeatureSelector(features = [:x1, - :x3], ignore = true)`, namely to select all features, with the - exception of `:x1` and `:x3`. - -- `ignore`: whether to ignore or keep specified `features`, as - explained above - - -# Operations - -- `transform(mach, Xnew)`: select features from the table `Xnew` as - specified by the model, taking features seen during training into - account, if relevant - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_to_keep`: the features that will be selected - - -# Example - -``` -using MLJ - -X = (ordinal1 = [1, 2, 3], - ordinal2 = coerce(["x", "y", "x"], OrderedFactor), - ordinal3 = [10.0, 20.0, 30.0], - ordinal4 = [-20.0, -30.0, -40.0], - nominal = coerce(["Your father", "he", "is"], Multiclass)); - -selector = FeatureSelector(features=[:ordinal3, ], ignore=true); - -julia> transform(fit!(machine(selector, X)), X) -(ordinal1 = [1, 2, 3], - ordinal2 = CategoricalValue{Symbol,UInt32}["x", "y", "x"], - ordinal4 = [-20.0, -30.0, -40.0], - nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) - -``` -""" -FeatureSelector +# docstring is in "src/type_docstrings.jl" diff --git a/src/type_docstrings.jl b/src/type_docstrings.jl new file mode 100644 index 0000000..7bed37b --- /dev/null +++ b/src/type_docstrings.jl @@ -0,0 +1,81 @@ +# This file cannot be include before types and all metadata is defined + +## Docstring +""" +$(MMI.doc_header(FeatureSelector)) + +Use this model to select features (columns) of a table, usually as +part of a model `Pipeline`. + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with + + mach = machine(model, X) + +where + +- `X`: any table of input features, where "table" is in the sense of Tables.jl + +Train the machine using `fit!(mach, rows=...)`. + + +# Hyper-parameters + +- `features`: one of the following, with the behavior indicated: + + - `[]` (empty, the default): filter out all features (columns) which + were not encountered in training + + - non-empty vector of feature names (symbols): keep only the + specified features (`ignore=false`) or keep only unspecified + features (`ignore=true`) + + - function or other callable: keep a feature if the callable returns + `true` on its name. For example, specifying + `FeatureSelector(features = name -> name in [:x1, :x3], ignore = + true)` has the same effect as `FeatureSelector(features = [:x1, + :x3], ignore = true)`, namely to select all features, with the + exception of `:x1` and `:x3`. + +- `ignore`: whether to ignore or keep specified `features`, as + explained above + + +# Operations + +- `transform(mach, Xnew)`: select features from the table `Xnew` as + specified by the model, taking features seen during training into + account, if relevant + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `features_to_keep`: the features that will be selected + + +# Example + +``` +using MLJ + +X = (ordinal1 = [1, 2, 3], + ordinal2 = coerce(["x", "y", "x"], OrderedFactor), + ordinal3 = [10.0, 20.0, 30.0], + ordinal4 = [-20.0, -30.0, -40.0], + nominal = coerce(["Your father", "he", "is"], Multiclass)); + +selector = FeatureSelector(features=[:ordinal3, ], ignore=true); + +julia> transform(fit!(machine(selector, X)), X) +(ordinal1 = [1, 2, 3], + ordinal2 = CategoricalValue{Symbol,UInt32}["x", "y", "x"], + ordinal4 = [-20.0, -30.0, -40.0], + nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) + +``` +""" +FeatureSelector diff --git a/test/models/featureselector.jl b/test/models/featureselector.jl index c38dc82..e89dd28 100644 --- a/test/models/featureselector.jl +++ b/test/models/featureselector.jl @@ -62,6 +62,7 @@ # Test model Metadata @test MLJBase.input_scitype(selector) == MLJBase.Table @test MLJBase.output_scitype(selector) == MLJBase.Table + @test MLJBase.package_name(selector) == "FeatureSelection" end # To be added with FeatureSelectorRule X = (n1=["a", "b", "a"], n2=["g", "g", "g"], n3=[7, 8, 9], diff --git a/test/models/rfe.jl b/test/models/rfe.jl index c9251d3..de95962 100644 --- a/test/models/rfe.jl +++ b/test/models/rfe.jl @@ -106,8 +106,10 @@ const DTM = DummyTestModels # Traits @test MLJBase.package_name(selector) == "FeatureSelection" @test MLJBase.load_path(selector) == "FeatureSelection.RecursiveFeatureElimination" - @test MLJBase.iteration_parameter(selector) == FeatureSelection.prepend(:model, MLJBase.iteration_parameter(selector.model)) - @test MLJBase.training_losses(selector, rpt) == MLJBase.training_losses(selector.model, rpt.model_report) + @test MLJBase.iteration_parameter(selector) == + FeatureSelection.prepend(:model, MLJBase.iteration_parameter(selector.model)) + @test MLJBase.training_losses(selector, rpt) == + MLJBase.training_losses(selector.model, rpt.model_report) end @testset "Compare results for RFE with scikit-learn" begin