Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 6 additions & 16 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "FeatureSelection"
uuid = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6"
authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>", "Samuel Okon <okonsamuel50@gmail.com"]
version = "0.2.4"
version = "0.2.5"

[deps]
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
Expand All @@ -11,38 +11,28 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
[compat]
Aqua = "0.8"
Distributions = "0.25"
julia = "1.10"
MLJBase = "1.4"
MLJTuning = "0.8"
MLJDecisionTreeInterface = "0.4"
MLJScikitLearnInterface = "0.6"
MLJModelInterface = "1.10"
MLJScikitLearnInterface = "0.6, 0.7"
MLJTuning = "0.8"
ScientificTypesBase = "3"
StableRNGs = "1"
StatisticalMeasures = "0.1, 0.2, 0.3"
Tables = "1.2"
Test = "1.6"
julia = "1.10"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f"
MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab"
MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = [
"Aqua",
"Distributions",
"MLJBase",
"MLJTuning",
"MLJDecisionTreeInterface",
"MLJScikitLearnInterface",
"StableRNGs",
"StatisticalMeasures",
"Test"
]
test = ["Aqua", "Distributions", "MLJBase", "MLJTuning", "MLJDecisionTreeInterface", "MLJScikitLearnInterface", "StableRNGs", "StatisticalMeasures", "Test"]
1 change: 1 addition & 0 deletions src/FeatureSelection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ const MMI = MLJModelInterface
include("models/featureselector.jl")
include("models/rfe.jl")
include("shared.jl")
include("type_docstrings.jl")

end # module
80 changes: 1 addition & 79 deletions src/models/featureselector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,82 +87,4 @@ MMI.metadata_model(
load_path = "FeatureSelection.FeatureSelector"
)

## Docstring
"""
$(MMI.doc_header(FeatureSelector))

Use this model to select features (columns) of a table, usually as
part of a model `Pipeline`.


# Training data

In MLJ or MLJBase, bind an instance `model` to data with

mach = machine(model, X)

where

- `X`: any table of input features, where "table" is in the sense of Tables.jl

Train the machine using `fit!(mach, rows=...)`.


# Hyper-parameters

- `features`: one of the following, with the behavior indicated:

- `[]` (empty, the default): filter out all features (columns) which
were not encountered in training

- non-empty vector of feature names (symbols): keep only the
specified features (`ignore=false`) or keep only unspecified
features (`ignore=true`)

- function or other callable: keep a feature if the callable returns
`true` on its name. For example, specifying
`FeatureSelector(features = name -> name in [:x1, :x3], ignore =
true)` has the same effect as `FeatureSelector(features = [:x1,
:x3], ignore = true)`, namely to select all features, with the
exception of `:x1` and `:x3`.

- `ignore`: whether to ignore or keep specified `features`, as
explained above


# Operations

- `transform(mach, Xnew)`: select features from the table `Xnew` as
specified by the model, taking features seen during training into
account, if relevant


# Fitted parameters

The fields of `fitted_params(mach)` are:

- `features_to_keep`: the features that will be selected


# Example

```
using MLJ

X = (ordinal1 = [1, 2, 3],
ordinal2 = coerce(["x", "y", "x"], OrderedFactor),
ordinal3 = [10.0, 20.0, 30.0],
ordinal4 = [-20.0, -30.0, -40.0],
nominal = coerce(["Your father", "he", "is"], Multiclass));

selector = FeatureSelector(features=[:ordinal3, ], ignore=true);

julia> transform(fit!(machine(selector, X)), X)
(ordinal1 = [1, 2, 3],
ordinal2 = CategoricalValue{Symbol,UInt32}["x", "y", "x"],
ordinal4 = [-20.0, -30.0, -40.0],
nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],)

```
"""
FeatureSelector
# docstring is in "src/type_docstrings.jl"
3 changes: 2 additions & 1 deletion src/models/rfe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ const ERR_FEATURES_SEEN = ArgumentError(
const MODEL_TYPES = [
:ProbabilisticRecursiveFeatureElimination, :DeterministicRecursiveFeatureElimination
]
const SUPER_TYPES = [:Deterministic, :Probabilistic]
const SUPER_TYPES = [:Probabilistic, :Deterministic]
const MODELTYPE_GIVEN_SUPERTYPES = zip(MODEL_TYPES, SUPER_TYPES)

for (ModelType, ModelSuperType) in MODELTYPE_GIVEN_SUPERTYPES
Expand Down Expand Up @@ -181,6 +181,7 @@ function RecursiveFeatureElimination(
# which is rare.
throw(ERR_MODEL_TYPE)
end

message = MMI.clean!(selector)
isempty(message) || @warn(message)
return selector
Expand Down
81 changes: 81 additions & 0 deletions src/type_docstrings.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# This file cannot be include before types and all metadata is defined

## Docstring
"""
$(MMI.doc_header(FeatureSelector))

Use this model to select features (columns) of a table, usually as
part of a model `Pipeline`.


# Training data

In MLJ or MLJBase, bind an instance `model` to data with

mach = machine(model, X)

where

- `X`: any table of input features, where "table" is in the sense of Tables.jl

Train the machine using `fit!(mach, rows=...)`.


# Hyper-parameters

- `features`: one of the following, with the behavior indicated:

- `[]` (empty, the default): filter out all features (columns) which
were not encountered in training

- non-empty vector of feature names (symbols): keep only the
specified features (`ignore=false`) or keep only unspecified
features (`ignore=true`)

- function or other callable: keep a feature if the callable returns
`true` on its name. For example, specifying
`FeatureSelector(features = name -> name in [:x1, :x3], ignore =
true)` has the same effect as `FeatureSelector(features = [:x1,
:x3], ignore = true)`, namely to select all features, with the
exception of `:x1` and `:x3`.

- `ignore`: whether to ignore or keep specified `features`, as
explained above


# Operations

- `transform(mach, Xnew)`: select features from the table `Xnew` as
specified by the model, taking features seen during training into
account, if relevant


# Fitted parameters

The fields of `fitted_params(mach)` are:

- `features_to_keep`: the features that will be selected


# Example

```
using MLJ

X = (ordinal1 = [1, 2, 3],
ordinal2 = coerce(["x", "y", "x"], OrderedFactor),
ordinal3 = [10.0, 20.0, 30.0],
ordinal4 = [-20.0, -30.0, -40.0],
nominal = coerce(["Your father", "he", "is"], Multiclass));

selector = FeatureSelector(features=[:ordinal3, ], ignore=true);

julia> transform(fit!(machine(selector, X)), X)
(ordinal1 = [1, 2, 3],
ordinal2 = CategoricalValue{Symbol,UInt32}["x", "y", "x"],
ordinal4 = [-20.0, -30.0, -40.0],
nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],)

```
"""
FeatureSelector
1 change: 1 addition & 0 deletions test/models/featureselector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
# Test model Metadata
@test MLJBase.input_scitype(selector) == MLJBase.Table
@test MLJBase.output_scitype(selector) == MLJBase.Table
@test MLJBase.package_name(selector) == "FeatureSelection"
end

# To be added with FeatureSelectorRule X = (n1=["a", "b", "a"], n2=["g", "g", "g"], n3=[7, 8, 9],
Expand Down
8 changes: 6 additions & 2 deletions test/models/rfe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ const DTM = DummyTestModels
selector = RecursiveFeatureElimination(model=rf)
selector2 = RecursiveFeatureElimination(model=rf2)
@test selector isa FeatureSelection.DeterministicRecursiveFeatureElimination
@test selector isa MLJBase.Deterministic
@test selector2 isa FeatureSelection.ProbabilisticRecursiveFeatureElimination
@test selector2 isa MLJBase.Probabilistic
@test MLJBase.constructor(selector) == RecursiveFeatureElimination

# Fit models
Expand Down Expand Up @@ -104,8 +106,10 @@ const DTM = DummyTestModels
# Traits
@test MLJBase.package_name(selector) == "FeatureSelection"
@test MLJBase.load_path(selector) == "FeatureSelection.RecursiveFeatureElimination"
@test MLJBase.iteration_parameter(selector) == FeatureSelection.prepend(:model, MLJBase.iteration_parameter(selector.model))
@test MLJBase.training_losses(selector, rpt) == MLJBase.training_losses(selector.model, rpt.model_report)
@test MLJBase.iteration_parameter(selector) ==
FeatureSelection.prepend(:model, MLJBase.iteration_parameter(selector.model))
@test MLJBase.training_losses(selector, rpt) ==
MLJBase.training_losses(selector.model, rpt.model_report)
end

@testset "Compare results for RFE with scikit-learn" begin
Expand Down
Loading