From 89c847935ff00e1ba4d9af01d9a49aeb537ac813 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 6 Apr 2026 12:55:09 +1200 Subject: [PATCH] re-organize code to resolve #33 oops --- src/FeatureSelection.jl | 1 + src/models/featureselector.jl | 80 +-------------------------------- src/type_docstrings.jl | 81 ++++++++++++++++++++++++++++++++++ test/models/featureselector.jl | 1 + test/models/rfe.jl | 6 ++- 5 files changed, 88 insertions(+), 81 deletions(-) create mode 100644 src/type_docstrings.jl diff --git a/src/FeatureSelection.jl b/src/FeatureSelection.jl index a2e8a54..17b9083 100644 --- a/src/FeatureSelection.jl +++ b/src/FeatureSelection.jl @@ -10,5 +10,6 @@ const MMI = MLJModelInterface include("models/featureselector.jl") include("models/rfe.jl") include("shared.jl") +include("type_docstrings.jl") end # module diff --git a/src/models/featureselector.jl b/src/models/featureselector.jl index 470de9b..9590cc9 100644 --- a/src/models/featureselector.jl +++ b/src/models/featureselector.jl @@ -87,82 +87,4 @@ MMI.metadata_model( load_path = "FeatureSelection.FeatureSelector" ) -## Docstring -""" -$(MMI.doc_header(FeatureSelector)) - -Use this model to select features (columns) of a table, usually as -part of a model `Pipeline`. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any table of input features, where "table" is in the sense of Tables.jl - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: one of the following, with the behavior indicated: - - - `[]` (empty, the default): filter out all features (columns) which - were not encountered in training - - - non-empty vector of feature names (symbols): keep only the - specified features (`ignore=false`) or keep only unspecified - features (`ignore=true`) - - - function or other callable: keep a feature if the callable returns - `true` on its name. For example, specifying - `FeatureSelector(features = name -> name in [:x1, :x3], ignore = - true)` has the same effect as `FeatureSelector(features = [:x1, - :x3], ignore = true)`, namely to select all features, with the - exception of `:x1` and `:x3`. - -- `ignore`: whether to ignore or keep specified `features`, as - explained above - - -# Operations - -- `transform(mach, Xnew)`: select features from the table `Xnew` as - specified by the model, taking features seen during training into - account, if relevant - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_to_keep`: the features that will be selected - - -# Example - -``` -using MLJ - -X = (ordinal1 = [1, 2, 3], - ordinal2 = coerce(["x", "y", "x"], OrderedFactor), - ordinal3 = [10.0, 20.0, 30.0], - ordinal4 = [-20.0, -30.0, -40.0], - nominal = coerce(["Your father", "he", "is"], Multiclass)); - -selector = FeatureSelector(features=[:ordinal3, ], ignore=true); - -julia> transform(fit!(machine(selector, X)), X) -(ordinal1 = [1, 2, 3], - ordinal2 = CategoricalValue{Symbol,UInt32}["x", "y", "x"], - ordinal4 = [-20.0, -30.0, -40.0], - nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) - -``` -""" -FeatureSelector +# docstring is in "src/type_docstrings.jl" diff --git a/src/type_docstrings.jl b/src/type_docstrings.jl new file mode 100644 index 0000000..7bed37b --- /dev/null +++ b/src/type_docstrings.jl @@ -0,0 +1,81 @@ +# This file cannot be include before types and all metadata is defined + +## Docstring +""" +$(MMI.doc_header(FeatureSelector)) + +Use this model to select features (columns) of a table, usually as +part of a model `Pipeline`. + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with + + mach = machine(model, X) + +where + +- `X`: any table of input features, where "table" is in the sense of Tables.jl + +Train the machine using `fit!(mach, rows=...)`. + + +# Hyper-parameters + +- `features`: one of the following, with the behavior indicated: + + - `[]` (empty, the default): filter out all features (columns) which + were not encountered in training + + - non-empty vector of feature names (symbols): keep only the + specified features (`ignore=false`) or keep only unspecified + features (`ignore=true`) + + - function or other callable: keep a feature if the callable returns + `true` on its name. For example, specifying + `FeatureSelector(features = name -> name in [:x1, :x3], ignore = + true)` has the same effect as `FeatureSelector(features = [:x1, + :x3], ignore = true)`, namely to select all features, with the + exception of `:x1` and `:x3`. + +- `ignore`: whether to ignore or keep specified `features`, as + explained above + + +# Operations + +- `transform(mach, Xnew)`: select features from the table `Xnew` as + specified by the model, taking features seen during training into + account, if relevant + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `features_to_keep`: the features that will be selected + + +# Example + +``` +using MLJ + +X = (ordinal1 = [1, 2, 3], + ordinal2 = coerce(["x", "y", "x"], OrderedFactor), + ordinal3 = [10.0, 20.0, 30.0], + ordinal4 = [-20.0, -30.0, -40.0], + nominal = coerce(["Your father", "he", "is"], Multiclass)); + +selector = FeatureSelector(features=[:ordinal3, ], ignore=true); + +julia> transform(fit!(machine(selector, X)), X) +(ordinal1 = [1, 2, 3], + ordinal2 = CategoricalValue{Symbol,UInt32}["x", "y", "x"], + ordinal4 = [-20.0, -30.0, -40.0], + nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) + +``` +""" +FeatureSelector diff --git a/test/models/featureselector.jl b/test/models/featureselector.jl index c38dc82..e89dd28 100644 --- a/test/models/featureselector.jl +++ b/test/models/featureselector.jl @@ -62,6 +62,7 @@ # Test model Metadata @test MLJBase.input_scitype(selector) == MLJBase.Table @test MLJBase.output_scitype(selector) == MLJBase.Table + @test MLJBase.package_name(selector) == "FeatureSelection" end # To be added with FeatureSelectorRule X = (n1=["a", "b", "a"], n2=["g", "g", "g"], n3=[7, 8, 9], diff --git a/test/models/rfe.jl b/test/models/rfe.jl index c9251d3..de95962 100644 --- a/test/models/rfe.jl +++ b/test/models/rfe.jl @@ -106,8 +106,10 @@ const DTM = DummyTestModels # Traits @test MLJBase.package_name(selector) == "FeatureSelection" @test MLJBase.load_path(selector) == "FeatureSelection.RecursiveFeatureElimination" - @test MLJBase.iteration_parameter(selector) == FeatureSelection.prepend(:model, MLJBase.iteration_parameter(selector.model)) - @test MLJBase.training_losses(selector, rpt) == MLJBase.training_losses(selector.model, rpt.model_report) + @test MLJBase.iteration_parameter(selector) == + FeatureSelection.prepend(:model, MLJBase.iteration_parameter(selector.model)) + @test MLJBase.training_losses(selector, rpt) == + MLJBase.training_losses(selector.model, rpt.model_report) end @testset "Compare results for RFE with scikit-learn" begin