From ec12d6313f4cfbfa5cfb5d5b37ce06a341e85e31 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 6 Apr 2026 14:43:44 +1200 Subject: [PATCH 1/2] bump 0.18.6 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 15a1237..335380e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModels" uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" authors = ["Anthony D. Blaom "] -version = "0.18.5" +version = "0.18.6" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From db3c02a91ef87ff1bd966829c779f53b8996c937 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 6 Apr 2026 14:44:16 +1200 Subject: [PATCH 2/2] update Model Registry --- src/registry/Metadata.toml | 2325 ++++++++++++++++++------------------ src/registry/Project.toml | 1 + 2 files changed, 1182 insertions(+), 1144 deletions(-) diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index a612721..82e0b6c 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -1,6 +1,6 @@ [BetaML.RandomForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -34,10 +34,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -71,10 +71,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.RandomForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -108,10 +108,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.RandomForestImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Vector{Int64}\", \"Union{Nothing, Function}\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -145,10 +145,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.PerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -182,10 +182,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.AutoEncoder] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"BetaML.Api.AutoTuneMethod\", \"String\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -219,10 +219,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.DecisionTreeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -256,10 +256,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.PegasosClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Function\", \"Float64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -293,47 +293,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[BetaML.NeuralNetworkRegressor] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" -":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" -":is_pure_julia" = "`true`" -":human_name" = "neural network regressor" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":package_name" = "BetaML" -":name" = "NeuralNetworkRegressor" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" [BetaML.KMeansClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -367,10 +330,47 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":constructor" = "`nothing`" + +[BetaML.NeuralNetworkRegressor] ":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" +":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":package_name" = "BetaML" +":name" = "NeuralNetworkRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" [BetaML.MultitargetGaussianMixtureRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -404,10 +404,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -441,10 +441,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.MultitargetNeuralNetworkRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -478,10 +478,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.DecisionTreeClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -515,10 +515,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GeneralImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{String, Vector{Int64}}\", \"Any\", \"Union{Bool, Vector{Bool}}\", \"Union{Function, Vector{Function}}\", \"Union{Function, Vector{Function}}\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -552,10 +552,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.NeuralNetworkClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Union{Nothing, Vector}\", \"String\", \"Any\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -589,10 +589,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.SimpleImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Function\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -626,10 +626,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"AbstractVector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -663,10 +663,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.KernelPerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Function\", \"Int64\", \"Union{Nothing, Vector{Vector{Int64}}}\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -700,10 +700,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.KMedoidsClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -737,104 +737,289 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" -[MLJEnsembles.EnsembleModel] -":constructor" = "`EnsembleModel`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" -":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +[MLJFlux.EntityEmbedder] +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"Union{MLJFlux.MLJFluxDeterministic, MLJFlux.MLJFluxProbabilistic}\",)`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJEnsembles.EnsembleModel" -":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" -":is_pure_julia" = "`false`" -":human_name" = "probabilistic ensemble model" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "MLJFlux.EntityEmbedder" +":hyperparameters" = "`(:model,)`" +":is_pure_julia" = "`true`" +":human_name" = "entity embedder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n""" +":docstring" = """```julia\nEntityEmbedder(; model=supervised_mljflux_model)\n```\n\nWrapper for a MLJFlux supervised model, to convert it to a transformer. Such transformers are still presented a target variable in training, but they behave as transformers in MLJ pipelines. They are entity embedding transformers, in the sense of the article, \"Entity Embeddings of Categorical Variables\" by Cheng Guo, Felix Berkhahn.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(embed_model, X, y)\n```\n\nHere:\n\n * `embed_model` is an instance of `EntityEmbedder`, which wraps a supervised MLJFlux model, `model`, which must be an instance of one of these: `MLJFlux.NeuralNetworkClassifier`, `NeuralNetworkBinaryClassifier`, `MLJFlux.NeuralNetworkRegressor`,`MLJFlux.MultitargetNeuralNetworkRegressor`.\n * `X` is any table of input features supported by the model being wrapped. Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n * `y` is the target, which can be any `AbstractVector` supported by the model being wrapped.\n\nTrain the machine using `fit!(mach)`.\n\n# Examples\n\nIn the following example we wrap a `NeuralNetworkClassifier` as an `EntityEmbedder`, so that it can be used to supply continuously encoded features to a nearest neighbor model, which does not support categorical features.\n\n## Simple Example\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# Train and transform the data using the embedder:\nmach = machine(emb, X, y)\nfit!(mach)\nXnew = transform(mach, X)\n\n# Compare schemas before and after transformation\nschema(X)\nschema(Xnew)\n```\n\n## Using with Downstream Models (Pipeline)\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Other supervised model type, requiring `Continuous` features:\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# Now construct the pipeline:\npipe = emb |> KNNClassifier()\n\n# And train it to make predictions:\nmach = machine(pipe, X, y)\nfit!(mach)\npredict(mach, X)[1:3]\n```\n\nIt is to be emphasized that the `NeuralNertworkClassifier` is only being used to learn entity embeddings, not to make predictions, which here are made by `KNNClassifier()`.\n\nSee also [`NeuralNetworkClassifier`, `NeuralNetworkRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" -":package_name" = "MLJEnsembles" -":name" = "EnsembleModel" +":package_url" = "https://github.com/FluxML/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "EntityEmbedder" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [] +":implemented_methods" = [":fit", ":fitted_params", ":training_losses", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" - -[CatBoost.CatBoostRegressor] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, String, PythonCall.Py}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" -":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJFlux.MultitargetNeuralNetworkRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" ":prediction_type" = ":deterministic" -":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostRegressor" -":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :one_hot_max_size, :random_strength, :custom_metric, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :ctr_target_border_count, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" -":is_pure_julia" = "`false`" -":human_name" = "CatBoost regressor" +":load_path" = "MLJFlux.MultitargetNeuralNetworkRegressor" +":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget neural network regressor" ":is_supervised" = "`true`" -":iteration_parameter" = ":iterations" -":docstring" = """```julia\nCatBoostRegressor\n```\n\nA model type for constructing a CatBoost regressor, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nCatBoostRegressor = @load CatBoostRegressor pkg=CatBoost\n```\n\nDo `model = CatBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostRegressor(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostRegressor model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```julia\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = [2.0, 4.0, 6.0, 7.0]\n\nmodel = CatBoostRegressor(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\npreds = predict(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostRegressor`](@ref).\n""" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nMultitargetNeuralNetworkRegressor\n```\n\nA model type for constructing a multitarget neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = MultitargetNeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetNeuralNetworkRegressor(builder=...)`.\n\n`MultitargetNeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a multi-valued `Continuous` target, represented as a table, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any table or matrix of output targets whose element scitype is `Continuous`; check column scitypes with `schema(y)`. If `y` is a `Matrix`, it is assumed to have columns corresponding to variables and rows corresponding to observations.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `Linear`, `Short`, and `MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we apply a multi-target regression model to synthetic data:\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we generate some synthetic data (needs MLJBase 0.20.16 or higher):\n\n```julia\nX, y = make_regression(100, 9; n_targets = 2) # both tables\nschema(y)\nschema(X)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features and `n_out` the number of target variables (both known at `fit!` time), while `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating the regression model:\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor\nmodel = MultitargetNeuralNetworkRegressor(builder=builder, rng=123, epochs=20)\n```\n\nWe will arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\nFor experimenting with learning rate, see the [`NeuralNetworkRegressor`](@ref) example.\n\n```julia\npipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we can now compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=multitarget_l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # trains on all data `(X, y)`\nyhat = predict(mach, Xtest)\nmultitarget_l2(yhat, ytest)\n```\n\nSee also [`NeuralNetworkRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/CatBoost.jl" -":package_name" = "CatBoost" -":name" = "CatBoostRegressor" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "MultitargetNeuralNetworkRegressor" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":supports_training_losses" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" -":reports_feature_importances" = "`true`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[CatBoost.CatBoostClassifier] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" -":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJFlux.NeuralNetworkClassifier] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" ":prediction_type" = ":probabilistic" -":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostClassifier" -":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :class_weights, :auto_class_weights, :one_hot_max_size, :random_strength, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" -":is_pure_julia" = "`false`" -":human_name" = "CatBoost classifier" +":load_path" = "MLJFlux.NeuralNetworkClassifier" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network classifier" ":is_supervised" = "`true`" -":iteration_parameter" = ":iterations" -":docstring" = """```julia\nCatBoostClassifier\n```\n\nA model type for constructing a CatBoost classifier, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nCatBoostClassifier = @load CatBoostClassifier pkg=CatBoost\n```\n\nDo `model = CatBoostClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostClassifier(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostClassifier model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```julia\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = coerce([0, 0, 1, 1], Multiclass)\n\nmodel = CatBoostClassifier(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\nprobs = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostClassifier`](@ref).\n""" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nNeuralNetworkClassifier\n```\n\nA model type for constructing a neural network classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkClassifier(builder=...)`.\n\n`NeuralNetworkClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a `Multiclass` or `OrderedFactor` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass` or `OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights.] Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ\nusing Flux\nimport RDatasets\nimport Optimisers\n```\n\nFirst, we can load the data:\n\n```julia\niris = RDatasets.dataset(\"datasets\", \"iris\");\ny, X = unpack(iris, ==(:Species), rng=123); # a vector and a table\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\nclf = NeuralNetworkClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(clf, X, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia\nclf.optimiser = Optimisers.Adam(clf.optimiser.eta * 2)\nclf.epochs = clf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(clf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(clf, X, y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy)\nusing Plots\nplot(curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\")\n\n```\n\nSee also [`ImageClassifier`](@ref), [`NeuralNetworkBinaryClassifier`](@ref).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/CatBoost.jl" -":package_name" = "CatBoost" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`true`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[MLJFlux.ImageClassifier] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Image}, AbstractVector{<:ScientificTypesBase.Multiclass}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJFlux.ImageClassifier" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration)`" +":is_pure_julia" = "`true`" +":human_name" = "image classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nImageClassifier\n```\n\nA model type for constructing a image classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\n```\n\nDo `model = ImageClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ImageClassifier(builder=...)`.\n\n`ImageClassifier` classifies images using a neural network adapted to the type of images provided (color or gray scale). Predictions are probabilistic. Users provide a recipe for constructing the network, based on properties of the image encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any `AbstractVector` of images with `ColorImage` or `GrayImage` scitype; check the scitype with `scitype(X)` and refer to ScientificTypes.jl documentation on coercing typical image formats into an appropriate type.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder`: An MLJFlux builder that constructs the neural network. The fallback builds a depth-16 VGG architecture adapted to the image size and number of target classes, with no batch normalization; see the Metalhead.jl documentation for details. See the example below for a user-specified builder. A convenience macro `@builder` is also available. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we use MLJFlux and a custom builder to classify the MNIST image dataset.\n\n```julia\nusing MLJ\nusing Flux\nimport MLJFlux\nimport Optimisers\nimport MLJIteration # for `skip` control\n```\n\nFirst we want to download the MNIST dataset, and unpack into images and labels:\n\n```julia\nimport MLDatasets: MNIST\ndata = MNIST(split=:train)\nimages, labels = data.features, data.targets\n```\n\nIn MLJ, integers cannot be used for encoding categorical data, so we must coerce them into the `Multiclass` scitype:\n\n```julia\nlabels = coerce(labels, Multiclass);\n```\n\nAbove `images` is a single array but MLJFlux requires the images to be a vector of individual image arrays:\n\n```julia\nimages = coerce(images, GrayImage);\nimages[1]\n```\n\nWe start by defining a suitable `builder` object. This is a recipe for building the neural network. Our builder will work for images of any (constant) size, whether they be color or black and white (ie, single or multi-channel). The architecture always consists of six alternating convolution and max-pool layers, and a final dense layer; the filter size and the number of channels after each convolution layer is customizable.\n\n```julia\nimport MLJFlux\n\nstruct MyConvBuilder\n filter_size::Int\n channels1::Int\n channels2::Int\n channels3::Int\nend\n\nmake2d(x::AbstractArray) = reshape(x, :, size(x)[end])\n\nfunction MLJFlux.build(b::MyConvBuilder, rng, n_in, n_out, n_channels)\n k, c1, c2, c3 = b.filter_size, b.channels1, b.channels2, b.channels3\n mod(k, 2) == 1 || error(\"`filter_size` must be odd. \")\n p = div(k - 1, 2) # padding to preserve image size\n init = Flux.glorot_uniform(rng)\n front = Chain(\n Conv((k, k), n_channels => c1, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c1 => c2, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c2 => c3, pad=(p, p), relu, init=init),\n MaxPool((2 ,2)),\n make2d)\n d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first\n return Chain(front, Dense(d, n_out, init=init))\nend\n```\n\nIt is important to note that in our `build` function, there is no final `softmax`. This is applied by default in all MLJFlux classifiers (override this using the `finaliser` hyperparameter).\n\nNow that our builder is defined, we can instantiate the actual MLJFlux model. If you have a GPU, you can substitute in `acceleration=CUDALibs()` below to speed up training.\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\nclf = ImageClassifier(builder=MyConvBuilder(3, 16, 32, 32),\n batch_size=50,\n epochs=10,\n rng=123)\n```\n\nYou can add Flux options such as `optimiser` and `loss` in the snippet above. Currently, `loss` must be a flux-compatible loss, and not an MLJ measure.\n\nNext, we can bind the model with the data in a machine, and train using the first 500 images:\n\n```julia\nmach = machine(clf, images, labels);\nfit!(mach, rows=1:500, verbosity=2);\nreport(mach)\nchain = fitted_params(mach)\nFlux.params(chain)[2]\n```\n\nWe can tack on 20 more epochs by modifying the `epochs` field, and iteratively fit some more:\n\n```julia\nclf.epochs = clf.epochs + 20\nfit!(mach, rows=1:500, verbosity=2);\n```\n\nWe can also make predictions and calculate an out-of-sample loss estimate, using any MLJ measure (loss/score):\n\n```julia\npredicted_labels = predict(mach, rows=501:1000);\ncross_entropy(predicted_labels, labels[501:1000])\n```\n\nThe preceding `fit!`/`predict`/evaluate workflow can be alternatively executed as follows:\n\n```julia\nevaluate!(mach,\n resampling=Holdout(fraction_train=0.5),\n measure=cross_entropy,\n rows=1:1000,\n verbosity=0)\n```\n\nSee also [`NeuralNetworkClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "ImageClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Multiclass}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Multiclass}`" +":supports_training_losses" = "`true`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[MLJFlux.NeuralNetworkBinaryClassifier] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Binary}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJFlux.NeuralNetworkBinaryClassifier" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network binary classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nNeuralNetworkBinaryClassifier\n```\n\nA model type for constructing a neural network binary classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkBinaryClassifier(builder=...)`.\n\n`NeuralNetworkBinaryClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a binary (`Multiclass{2}` or `OrderedFactor{2}`) target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass{2}` or `OrderedFactor{2}`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.binarycrossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.binarycrossentropy`: Standard binary classification loss, also known as the log loss.\n * `Flux.logitbinarycrossentropy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `σ` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default sigmoid finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.binary_focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.σ`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.σ`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ, Flux\nimport Optimisers\nimport RDatasets\n```\n\nFirst, we can load the data:\n\n```julia\nmtcars = RDatasets.dataset(\"datasets\", \"mtcars\");\ny, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec]));\n```\n\nNote that `y` is a vector and `X` a table.\n\n```julia\ny = categorical(y) # classifier takes catogorical input\nX_f32 = Float32.(X) # To match floating point type of the neural network layers\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\nbclf = NeuralNetworkBinaryClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(bclf, X_f32, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia-repl\njulia> bclf.optimiser\nAdam(0.001, (0.9, 0.999), 1.0e-8)\n```\n\n```julia\nbclf.optimiser = Optimisers.Adam(eta = bclf.optimiser.eta * 2)\nbclf.epochs = bclf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X_f32), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(bclf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(\n bclf,\n X_f32,\n y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy,\n)\nusing Plots\nplot(\n curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\",\n)\n\n```\n\nSee also [`ImageClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkBinaryClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":supports_training_losses" = "`true`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[MLJFlux.NeuralNetworkRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJFlux.NeuralNetworkRegressor" +":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nNeuralNetworkRegressor\n```\n\nA model type for constructing a neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkRegressor(builder=...)`.\n\n`NeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a `Continuous` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increasing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalized if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a regression model for the Boston house price dataset.\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we load in the data: The `:MEDV` column becomes the target vector `y`, and all remaining columns go into a table `X`, with the exception of `:CHAS`:\n\n```julia\ndata = OpenML.load(531); # Loads from https://www.openml.org/d/531\ny, X = unpack(data, ==(:MEDV), !=(:CHAS); rng=123);\n\nscitype(y)\nschema(X)\n```\n\nSince MLJFlux models do not handle ordered factors, we'll treat `:RAD` as `Continuous`:\n\n```julia\nX = coerce(X, :RAD=>Continuous)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features (which will be known at `fit!` time) and `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below). We also have the parameter `n_out` which is the number of output features. As we are doing single target regression, the value passed will always be `1`, but the builder we define will also work for [`MultitargetNeuralNetworkRegressor`](@ref).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating a model:\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\nmodel = NeuralNetworkRegressor(\n builder=builder,\n rng=123,\n epochs=20\n)\n```\n\nWe arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`.\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\n## Experimenting with learning rate\n\nWe can visually compare how the learning rate affects the predictions:\n\n```julia\nusing Plots\n\nrates = rates = [5e-5, 1e-4, 0.005, 0.001, 0.05]\nplt=plot()\n\nforeach(rates) do η\n pipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(η)\n fit!(mach, force=true, verbosity=0)\n losses =\n report(mach).transformed_target_model_deterministic.model.training_losses[3:end]\n plot!(1:length(losses), losses, label=η)\nend\n\nplt\n\npipe.transformed_target_model_deterministic.model.optimiser.eta = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # train on `(X, y)`\nyhat = predict(mach, Xtest)\nl2(yhat, ytest)\n```\n\nThese losses, for the pipeline model, refer to the target on the original, unstandardized, scale.\n\nFor implementing stopping criterion and other iteration controls, refer to examples linked from the MLJFlux documentation.\n\nSee also [`MultitargetNeuralNetworkRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`true`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[CatBoost.CatBoostRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, String, PythonCall.Py}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" +":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostRegressor" +":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :one_hot_max_size, :random_strength, :custom_metric, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :ctr_target_border_count, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" +":is_pure_julia" = "`false`" +":human_name" = "CatBoost regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":iterations" +":docstring" = """```julia\nCatBoostRegressor\n```\n\nA model type for constructing a CatBoost regressor, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nCatBoostRegressor = @load CatBoostRegressor pkg=CatBoost\n```\n\nDo `model = CatBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostRegressor(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostRegressor model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```julia\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = [2.0, 4.0, 6.0, 7.0]\n\nmodel = CatBoostRegressor(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\npreds = predict(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/CatBoost.jl" +":package_name" = "CatBoost" +":name" = "CatBoostRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[CatBoost.CatBoostClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" +":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostClassifier" +":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :class_weights, :auto_class_weights, :one_hot_max_size, :random_strength, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" +":is_pure_julia" = "`false`" +":human_name" = "CatBoost classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":iterations" +":docstring" = """```julia\nCatBoostClassifier\n```\n\nA model type for constructing a CatBoost classifier, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nCatBoostClassifier = @load CatBoostClassifier pkg=CatBoost\n```\n\nDo `model = CatBoostClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostClassifier(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostClassifier model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```julia\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = coerce([0, 0, 1, 1], Multiclass)\n\nmodel = CatBoostClassifier(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\nprobs = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/CatBoost.jl" +":package_name" = "CatBoost" ":name" = "CatBoostClassifier" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" @@ -851,7 +1036,7 @@ ":is_wrapper" = "`false`" [NearestNeighborModels.KNNClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -885,10 +1070,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [NearestNeighborModels.MultitargetKNNClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:NTuple{N, AbstractVector}}}}\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -922,10 +1107,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [NearestNeighborModels.MultitargetKNNRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -959,10 +1144,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [NearestNeighborModels.KNNRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -996,10 +1181,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJXGBoostInterface.XGBoostCount] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1033,10 +1218,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJXGBoostInterface.XGBoostRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1070,10 +1255,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJXGBoostInterface.XGBoostClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1107,10 +1292,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ProbabilisticSGDClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1144,10 +1329,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeCVClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"AbstractArray{Float64}\", \"Bool\", \"Any\", \"Int64\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1181,10 +1366,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LogisticClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Bool\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Any\", \"Any\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1218,10 +1403,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RandomForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1255,10 +1440,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ElasticNetCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1292,10 +1477,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, String}\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1329,10 +1514,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskLassoRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1366,10 +1551,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -1403,10 +1588,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HDBSCAN] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, Int64}\", \"String\", \"Float64\", \"String\", \"Int64\", \"String\", \"Bool\", \"Union{Nothing, String}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1440,10 +1625,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DBSCAN] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1477,10 +1662,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1514,10 +1699,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsICRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1551,10 +1736,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ARDRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1588,10 +1773,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMNuRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1625,10 +1810,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1662,10 +1847,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SGDRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Float64\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1699,10 +1884,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ComplementNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -1736,10 +1921,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HuberRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1773,10 +1958,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMNuClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1810,10 +1995,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GradientBoostingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1847,10 +2032,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianProcessRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Union{Float64, AbstractArray}\", \"Any\", \"Int64\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1884,10 +2069,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMLinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1921,10 +2106,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LarsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1958,10 +2143,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MeanShift] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Float64}\", \"Union{Nothing, AbstractArray}\", \"Bool\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1995,10 +2180,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HistGradientBoostingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2032,10 +2217,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AdaBoostRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -2069,10 +2254,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AffinityPropagation] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"String\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2106,10 +2291,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskLassoCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2143,10 +2328,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -2180,10 +2365,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BernoulliNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -2217,10 +2402,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PassiveAggressiveClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2254,10 +2439,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, String}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2291,10 +2476,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Any\", \"Int64\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2328,10 +2513,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector{Float64}}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -2365,10 +2550,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ExtraTreesClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2402,10 +2587,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Int64, String}\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2439,10 +2624,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskElasticNetCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2476,10 +2661,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2513,10 +2698,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Union{Nothing, Int64}\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2550,10 +2735,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AdaBoostClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -2587,10 +2772,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PassiveAggressiveRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\", \"String\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2624,10 +2809,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianRidgeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2661,10 +2846,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianProcessClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Int64\", \"Bool\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2698,10 +2883,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BaggingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2735,10 +2920,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OPTICS] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Float64\", \"String\", \"Int64\", \"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Bool\", \"Union{Nothing, Float64, Int64}\", \"String\", \"Int64\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2772,10 +2957,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RANSACRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64}\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Function, String}\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2809,10 +2994,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KNeighborsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2846,10 +3031,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HistGradientBoostingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2883,10 +3068,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MiniBatchKMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Int64, String}\", \"Union{String, AbstractArray}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2920,10 +3105,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2957,10 +3142,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DummyRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Any\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -2994,10 +3179,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BisectingKMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3031,10 +3216,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3068,10 +3253,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LarsCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3105,10 +3290,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KNeighborsClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3142,10 +3327,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMLinearClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Bool\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3179,10 +3364,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.FeatureAgglomeration] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Any\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3216,10 +3401,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DummyClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Any\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -3253,10 +3438,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BaggingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3290,10 +3475,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianQDA] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector}\", \"Float64\", \"Bool\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -3327,10 +3512,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianLDA] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64, String}\", \"Union{Nothing, AbstractVector}\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3364,10 +3549,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SGDClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3401,10 +3586,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.TheilSenRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Float64\", \"Any\", \"Union{Nothing, Int64}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3438,10 +3623,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SpectralClustering] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, String}\", \"Any\", \"Int64\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3475,10 +3660,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.Birch] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -3512,10 +3697,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AgglomerativeClustering] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3549,10 +3734,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ElasticNetRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Int64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3586,10 +3771,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RandomForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3623,10 +3808,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LogisticCVClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Int64, AbstractVector{Float64}}\", \"Bool\", \"Any\", \"Bool\", \"String\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Any\", \"Union{Nothing, Int64}\", \"Int64\", \"Bool\", \"Float64\", \"String\", \"Any\", \"Union{Nothing, AbstractVector{Float64}}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3660,10 +3845,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskElasticNetRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3697,10 +3882,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ExtraTreesRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3734,10 +3919,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3771,10 +3956,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultinomialNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -3808,10 +3993,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GradientBoostingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3845,10 +4030,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3882,10 +4067,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.ABODDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3919,10 +4104,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.DNNDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Real\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3956,10 +4141,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.LOFDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3993,10 +4178,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.KNNDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Symbol\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4030,10 +4215,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.COFDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4067,7 +4252,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [SIRUS.StableRulesClassifier] ":is_wrapper" = "`false`" @@ -4699,7 +4884,7 @@ ":constructor" = "`nothing`" [Maxnet.MaxnetBinaryClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{String, Vector{<:Maxnet.AbstractFeatureClass}}\", \"Float64\", \"Any\", \"Bool\", \"Integer\", \"Float64\", \"GLM.Link\", \"Bool\", \"Any\")`" ":package_uuid" = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4733,7 +4918,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [ParallelKMeans.KMeans] ":is_wrapper" = "`false`" @@ -5438,6 +5623,43 @@ ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`true`" +[MLJWrappers.Transformer] +":constructor" = "`MLJWrappers.Transformer`" +":hyperparameter_types" = "`(\"MLJModelInterface.Model\",)`" +":package_uuid" = "b5d0f7f3-9870-4c70-ba08-cb780c37e63f" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJWrappers.Transformer" +":hyperparameters" = "`(:model,)`" +":is_pure_julia" = "`true`" +":human_name" = "transformer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nTransformer(supervised_model)\n```\n\nWrap `supervised_model` so that it is treated as a transformer in MLJ pipelines. It is assumed that `supervised_model isa Supervised` and that `transform` is implemented for the model type.\n\nFor `Supervised` models in an MLJ pipeline, it is the output of `predict` that is propagated by default to the next model in the pipeline. By wrapping in `Transform`, the output of `transform` is propagated instead.\n\nThe original hyperparameters of `supervised_model` are nested hyperparameters in `Transformer(supervised_model)`, but in most other respects the latter behaves like `supervised_model`.\n\n# Example\n\nBelow `reducer` is a supervised model implementing `transform` which selects features using Recursive Feature Elimination. Because it is supervised, we obtain an error when we follow it with another supervised model in a pipeline:\n\n```julia\nusing MLJ\nRandomForestClassifier = @load RandomForestClassifier pkg=DecisionTree\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n\nreducer = RecursiveFeatureElimination(RandomForestClassifier(), n_features=2)\nreducer |> KNNClassifier()\n# ERROR: ArgumentError: More than one supervised model in a pipeline is not permitted\n```\n\nThe following, however, works as expected, passing the reduced training features to the K-nearest neighbor classifier, when `pipe` is trained.\n\n```julia\npipe = Transformer(reducer) |> KNNClassifier()\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/Transformer.jl" +":package_name" = "MLJWrappers.jl" +":name" = "Transformer" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":reformat", ":training_losses", ":transform", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" + [Imbalance.RandomOversampler] ":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" @@ -5846,7 +6068,7 @@ ":is_wrapper" = "`false`" [MLJTuning.TunedModel] -":is_wrapper" = "`true`" +":constructor" = "`TunedModel`" ":hyperparameter_types" = "`(\"Union{MLJModelInterface.Probabilistic, MLJModelInterface.ProbabilisticSupervisedDetector, MLJModelInterface.ProbabilisticUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5880,80 +6102,80 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`TunedModel`" +":is_wrapper" = "`true`" [FeatureSelection.FeatureSelector] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" -":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":input_scitype" = "`ScientificTypesBase.Table`" ":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "FeatureSelection" ":package_license" = "MIT" -":prediction_type" = ":unknown" ":load_path" = "FeatureSelection.FeatureSelector" -":hyperparameters" = "`(:features, :ignore)`" -":is_pure_julia" = "`true`" -":human_name" = "feature selector" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFeatureSelector = @load FeatureSelector pkg=unknown\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```julia\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" ":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" -":package_name" = "FeatureSelection" -":name" = "FeatureSelector" -":target_in_fit" = "`false`" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" +":docstring" = """```julia\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [FeatureSelection.jl](https://github.com/JuliaAI/FeatureSelection.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFeatureSelector = @load FeatureSelector pkg=FeatureSelection\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```julia\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n""" +":name" = "FeatureSelector" +":human_name" = "feature selector" +":tags" = [] +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":hyperparameters" = "`(:features, :ignore)`" +":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" ":constructor" = "`nothing`" [FeatureSelection.RecursiveFeatureElimination] -":is_wrapper" = "`true`" -":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" -":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "FeatureSelection" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" ":load_path" = "FeatureSelection.RecursiveFeatureElimination" -":hyperparameters" = "`(:model, :n_features, :step)`" -":is_pure_julia" = "`true`" -":human_name" = "deterministic recursive feature elimination" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nRecursiveFeatureElimination(model; n_features=0, step=1)\n```\n\nThis model implements a recursive feature elimination algorithm for feature selection. It recursively removes features, training a base model on the remaining features and evaluating their importance until the desired number of features is selected.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `rfe_model` to data with\n\n```julia\nmach = machine(rfe_model, X, y)\n```\n\nOR, if the base model supports weights, as\n\n```julia\nmach = machine(rfe_model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype as that required by the base model; check column scitypes with `schema(X)` and column scitypes required by base model with `input_scitype(basemodel)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous` or `Finite` depending on the `target_scitype` required by the base model; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * model: A base model with a `fit` method that provides information on feature feature importance (i.e `reports_feature_importances(model) == true`)\n * n_features::Real = 0: The number of features to select. If `0`, half of the features are selected. If a positive integer, the parameter is the absolute number of features to select. If a real number between 0 and 1, it is the fraction of features to select.\n * step::Real=1: If the value of step is at least 1, it signifies the quantity of features to eliminate in each iteration. Conversely, if step falls strictly within the range of 0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.\n\n# Operations\n\n * `transform(mach, X)`: transform the input table `X` into a new table containing only columns corresponding to features accepted by the RFE algorithm.\n * `predict(mach, X)`: transform the input table `X` into a new table same as in `transform(mach, X)` above and predict using the fitted base model on the transformed table.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_left`: names of features remaining after recursive feature elimination.\n * `model_fitresult`: fitted parameters of the base model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `scores`: dictionary of scores for each feature in the training dataset. The model deems highly scored variables more significant.\n * `model_report`: report for the fitted base model.\n\n# Examples\n\nThe following example assumes you have MLJDecisionTreeInterface in the active package ennvironment.\n\n```julia\nusing MLJ\n\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n\n# Creates a dataset where the target only depends on the first 5 columns of the input table.\nA = rand(50, 10);\ny = 10 .* sin.(\n pi .* A[:, 1] .* A[:, 2]\n ) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5];\nX = MLJ.table(A);\n\n# fit a rfe model:\nrf = RandomForestRegressor()\nselector = RecursiveFeatureElimination(rf, n_features=2)\nmach = machine(selector, X, y)\nfit!(mach)\n\n# view the feature importances\nfeature_importances(mach)\n\n# predict using the base model trained on the reduced feature set:\nXnew = MLJ.table(rand(50, 10));\npredict(mach, Xnew)\n\n# transform data with all features to the reduced feature set:\ntransform(mach, Xnew)\n```\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" ":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" -":package_name" = "FeatureSelection" -":name" = "RecursiveFeatureElimination" -":target_in_fit" = "`true`" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" +":docstring" = """```julia\nRecursiveFeatureElimination(model; n_features=0, step=1)\n```\n\nThis model implements a recursive feature elimination algorithm for feature selection. It recursively removes features, training a base model on the remaining features and evaluating their importance until the desired number of features is selected.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `rfe_model` to data with\n\n```julia\nmach = machine(rfe_model, X, y)\n```\n\nOR, if the base model supports weights, as\n\n```julia\nmach = machine(rfe_model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype as that required by the base model; check column scitypes with `schema(X)` and column scitypes required by base model with `input_scitype(basemodel)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous` or `Finite` depending on the `target_scitype` required by the base model; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * model: A base model with a `fit` method that provides information on feature feature importance (i.e `reports_feature_importances(model) == true`)\n * n_features::Real = 0: The number of features to select. If `0`, half of the features are selected. If a positive integer, the parameter is the absolute number of features to select. If a real number between 0 and 1, it is the fraction of features to select.\n * step::Real=1: If the value of step is at least 1, it signifies the quantity of features to eliminate in each iteration. Conversely, if step falls strictly within the range of 0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.\n\n# Operations\n\n * `transform(mach, X)`: transform the input table `X` into a new table containing only columns corresponding to features accepted by the RFE algorithm.\n * `predict(mach, X)`: transform the input table `X` into a new table same as in `transform(mach, X)` above and predict using the fitted base model on the transformed table.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_left`: names of features remaining after recursive feature elimination.\n * `model_fitresult`: fitted parameters of the base model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `scores`: dictionary of scores for each feature in the training dataset. The model deems highly scored variables more significant.\n * `model_report`: report for the fitted base model.\n\n# Examples\n\nThe following example assumes you have MLJDecisionTreeInterface in the active package ennvironment.\n\n```julia\nusing MLJ\n\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n\n# Creates a dataset where the target only depends on the first 5 columns of the input table.\nA = rand(50, 10);\ny = 10 .* sin.(\n pi .* A[:, 1] .* A[:, 2]\n ) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5];\nX = MLJ.table(A);\n\n# fit a rfe model:\nrf = RandomForestRegressor()\nselector = RecursiveFeatureElimination(rf, n_features=2)\nmach = machine(selector, X, y)\nfit!(mach)\n\n# view the feature importances\nfeature_importances(mach)\n\n# predict using the base model trained on the reduced feature set:\nXnew = MLJ.table(rand(50, 10));\npredict(mach, Xnew)\n\n# transform data with all features to the reduced feature set:\ntransform(mach, Xnew)\n```\n""" +":name" = "RecursiveFeatureElimination" +":human_name" = "probabilistic recursive feature elimination" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":hyperparameters" = "`(:model, :n_features, :step)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" ":constructor" = "`RecursiveFeatureElimination`" [EvoLinear.EvoLinearRegressor] @@ -5994,7 +6216,7 @@ ":constructor" = "`nothing`" [MLJText.TfidfTransformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -6028,10 +6250,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJText.CountTransformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -6065,10 +6287,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJText.BM25Transformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -6102,7 +6324,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [LightGBM.LGBMClassifier] ":is_wrapper" = "`false`" @@ -6737,7 +6959,7 @@ [InteractiveUtils] [OneRule.OneRuleClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`()`" ":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" ":hyperparameter_ranges" = "`()`" @@ -6771,10 +6993,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.MCDDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -6791,7 +7013,7 @@ ":human_name" = "mcd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nMCDDetector(store_precision = true,\n assume_centered = false,\n support_fraction = nothing,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd)\n""" +":docstring" = """```julia\nMCDDetector(store_precision = true,\n assume_centered = false,\n support_fraction = nothing,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -6808,10 +7030,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.COPODDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" @@ -6828,7 +7050,7 @@ ":human_name" = "copod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCOPODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod)\n""" +":docstring" = """```julia\nCOPODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -6845,10 +7067,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.HBOSDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -6865,7 +7087,7 @@ ":human_name" = "hbos detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nHBOSDetector(n_bins = 10,\n alpha = 0.1,\n tol = 0.5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos)\n""" +":docstring" = """```julia\nHBOSDetector(n_bins = 10,\n alpha = 0.1,\n tol = 0.5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -6882,10 +7104,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.IForestDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6902,7 +7124,7 @@ ":human_name" = "i forest detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nIForestDetector(n_estimators = 100,\n max_samples = \"auto\",\n max_features = 1.0\n bootstrap = false,\n random_state = nothing,\n verbose = 0,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest)\n""" +":docstring" = """```julia\nIForestDetector(n_estimators = 100,\n max_samples = \"auto\",\n max_features = 1.0\n bootstrap = false,\n random_state = nothing,\n verbose = 0,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -6919,10 +7141,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.SOSDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Real\", \"String\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -6939,7 +7161,7 @@ ":human_name" = "sos detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSOSDetector(perplexity = 4.5,\n metric = \"minkowski\",\n eps = 1e-5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos)\n""" +":docstring" = """```julia\nSOSDetector(perplexity = 4.5,\n metric = \"minkowski\",\n eps = 1e-5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -6956,10 +7178,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.ABODDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"String\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -6976,7 +7198,7 @@ ":human_name" = "abod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nABODDetector(n_neighbors = 5,\n method = \"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod)\n""" +":docstring" = """```julia\nABODDetector(n_neighbors = 5,\n method = \"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -6993,10 +7215,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.LOFDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\", \"Bool\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7013,7 +7235,7 @@ ":human_name" = "lof detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" +":docstring" = """```julia\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7030,10 +7252,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.PCADetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7050,7 +7272,7 @@ ":human_name" = "pca detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" +":docstring" = """```julia\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7067,10 +7289,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.INNEDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -7087,7 +7309,7 @@ ":human_name" = "inne detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" +":docstring" = """```julia\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7104,10 +7326,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.OCSVMDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7124,7 +7346,7 @@ ":human_name" = "ocsvm detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" +":docstring" = """```julia\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7141,10 +7363,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.ECODDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" @@ -7161,7 +7383,7 @@ ":human_name" = "ecod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" +":docstring" = """```julia\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7178,10 +7400,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.SODDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -7198,7 +7420,7 @@ ":human_name" = "sod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" +":docstring" = """```julia\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7215,10 +7437,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.LODADetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -7235,7 +7457,7 @@ ":human_name" = "loda detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" +":docstring" = """```julia\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7252,10 +7474,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.KDEDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -7272,7 +7494,7 @@ ":human_name" = "kde detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" +":docstring" = """```julia\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7289,10 +7511,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.CDDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"PythonCall.Py\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" @@ -7309,7 +7531,7 @@ ":human_name" = "cd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" +":docstring" = """```julia\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7326,10 +7548,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.KNNDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7346,7 +7568,7 @@ ":human_name" = "knn detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" +":docstring" = """```julia\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7363,10 +7585,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.GMMDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7383,7 +7605,7 @@ ":human_name" = "gmm detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" +":docstring" = """```julia\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7400,10 +7622,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.COFDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"String\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -7420,7 +7642,7 @@ ":human_name" = "cof detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" +":docstring" = """```julia\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7437,10 +7659,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.CBLOFDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7457,7 +7679,7 @@ ":human_name" = "cblof detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" +":docstring" = """```julia\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7474,10 +7696,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.LOCIDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Real\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -7494,7 +7716,7 @@ ":human_name" = "loci detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" +":docstring" = """```julia\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7511,10 +7733,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.LMDDDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -7531,7 +7753,7 @@ ":human_name" = "lmdd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" +":docstring" = """```julia\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7548,10 +7770,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionPython.RODDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Bool\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" @@ -7568,7 +7790,7 @@ ":human_name" = "rod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" +":docstring" = """```julia\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" @@ -7585,7 +7807,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [SelfOrganizingMaps.SelfOrganizingMap] ":constructor" = "`nothing`" @@ -8253,366 +8475,403 @@ ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[MLJTransforms.Standardizer] +[MLJLIBSVMInterface.OneClassSVM] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.Standardizer" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" -":is_pure_julia" = "`true`" -":human_name" = "standardizer" +":load_path" = "MLJLIBSVMInterface.OneClassSVM" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "one-class support vector machine" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nStandardizer = @load Standardizer pkg=MLJTransforms\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```julia\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "Standardizer" +":docstring" = """```julia\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```julia\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```julia\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```julia\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "OneClassSVM" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -[MLJTransforms.UnivariateTimeTypeToContinuous] +[MLJLIBSVMInterface.EpsilonSVR] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateTimeTypeToContinuous" -":hyperparameters" = "`(:zero_time, :step)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable transformer that creates continuous representations of temporally typed data" -":is_supervised" = "`false`" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.EpsilonSVR" +":hyperparameters" = "`(:kernel, :gamma, :epsilon, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ϵ-support vector regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJTransforms\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```julia\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateTimeTypeToContinuous" -":target_in_fit" = "`false`" +":docstring" = """```julia\nEpsilonSVR\n```\n\nA model type for constructing a ϵ-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM\n```\n\nDo `model = EpsilonSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EpsilonSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an adaptation of the classifier `SVC` to regression, but has an additional parameter `epsilon` (denoted $ϵ$ in the cited reference).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `epsilon=0.1` (range (0, `Inf`)): the parameter denoted $ϵ$ in the cited reference; `epsilon` is the thickness of the penalty-free neighborhood of the graph of the prediction function (\"slab\" or \"tube\"). Specifically, a data point `(x, y)` incurs no training loss unless it is outside this neighborhood; the further away it is from the this neighborhood, the greater the loss penalty.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM # model type\nmodel = EpsilonSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2512132502584155\n 0.007340201523624579\n -0.2482949812264707\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = EpsilonSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1121225361666656\n 0.04667702229741916\n -0.6958148424680672\n```\n\nSee also [`NuSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "EpsilonSVR" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -[MLJTransforms.OneHotEncoder] +[MLJLIBSVMInterface.LinearSVC] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_types" = "`(\"LIBSVM.Linearsolver.LINEARSOLVER\", \"Float64\", \"Float64\", \"Float64\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.OneHotEncoder" -":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" -":is_pure_julia" = "`true`" -":human_name" = "one-hot encoder" -":is_supervised" = "`false`" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.LinearSVC" +":hyperparameters" = "`(:solver, :tolerance, :cost, :bias)`" +":is_pure_julia" = "`false`" +":human_name" = "linear support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOneHotEncoder = @load OneHotEncoder pkg=MLJTransforms\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (feature names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=false`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "OneHotEncoder" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" +":docstring" = """```julia\nLinearSVC\n```\n\nA model type for constructing a linear support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nLinearSVC = @load LinearSVC pkg=LIBSVM\n```\n\nDo `model = LinearSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearSVC(solver=...)`.\n\nReference for algorithm and core C-library: Rong-En Fan et al (2008): \"LIBLINEAR: A Library for Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf). \n\nThis model type is similar to `SVC` from the same package with the setting `kernel=LIBSVM.Kernel.KERNEL.Linear`, but is optimized for the linear case.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `solver=LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: linear solver, which must be one of the following from the LIBSVM.jl package:\n\n * `LIBSVM.Linearsolver.L2R_LR`: L2-regularized logistic regression (primal))\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: L2-regularized L2-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC`: L2-regularized L2-loss support vector classification (primal)\n * `LIBSVM.Linearsolver.L2R_L1LOSS_SVC_DUAL`: L2-regularized L1-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.MCSVM_CS`: support vector classification by Crammer and Singer) `LIBSVM.Linearsolver.L1R_L2LOSS_SVC`: L1-regularized L2-loss support vector classification)\n * `LIBSVM.Linearsolver.L1R_LR`: L1-regularized logistic regression\n * `LIBSVM.Linearsolver.L2R_LR_DUAL`: L2-regularized logistic regression (dual)\n * `tolerance::Float64=Inf`: tolerance for the stopping criterion;\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `bias= -1.0`: if `bias >= 0`, instance `x` becomes `[x; bias]`; if `bias < 0`, no bias term added (default -1)\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Examples\n\n```julia\nusing MLJ\nimport LIBSVM\n\nLinearSVC = @load LinearSVC pkg=LIBSVM # model type\nmodel = LinearSVC(solver=LIBSVM.Linearsolver.L2R_LR) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"versicolor\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the [`SVC`](@ref) and [`NuSVC`](@ref) classifiers, and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/liblinear/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "LinearSVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -[MLJTransforms.ContinuousEncoder] +[MLJLIBSVMInterface.ProbabilisticSVC] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.ContinuousEncoder" -":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" -":is_pure_julia" = "`true`" -":human_name" = "continuous encoder" -":is_supervised" = "`false`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "unknown" +":prediction_type" = ":probabilistic" +":load_path" = "MLJLIBSVMInterface.ProbabilisticSVC" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic C-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nContinuousEncoder = @load ContinuousEncoder pkg=MLJTransforms\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "ContinuousEncoder" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" +":docstring" = """```julia\nProbabilisticSVC\n```\n\nA model type for constructing a probabilistic C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticSVC(kernel=...)`.\n\nThis model is identical to [`SVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to the total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return probabilistic predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM # model type\nmodel = ProbabilisticSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00186, versicolor=>0.003, virginica=>0.995)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000563, versicolor=>0.0554, virginica=>0.944)\n UnivariateFinite{Multiclass{3}}(setosa=>1.4e-6, versicolor=>1.68e-6, virginica=>1.0)\n\n\njulia> labels = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`SVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref), and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "ProbabilisticSVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -[MLJTransforms.FrequencyEncoder] +[MLJLIBSVMInterface.NuSVR] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.FrequencyEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" -":is_pure_julia" = "`true`" -":human_name" = "frequency encoder" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.NuSVR" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ν-support vector regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "FrequencyEncoder" -":target_in_fit" = "`false`" +":docstring" = """```julia\nNuSVR\n```\n\nA model type for constructing a ν-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNuSVR = @load NuSVR pkg=LIBSVM\n```\n\nDo `model = NuSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is a re-parameterization of `EpsilonSVR` in which the `epsilon` hyper-parameter is replaced with a new parameter `nu` (denoted $ν$ in the cited reference) which attempts to control the number of support vectors directly.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be\n\n called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Denoted $ν$ in the cited paper. Changing `nu` changes the thickness of some neighborhood of the graph of the prediction function (\"tube\" or \"slab\") and a training error is said to occur when a data point `(x, y)` lies outside of that neighborhood.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nNuSVR = @load NuSVR pkg=LIBSVM # model type\nmodel = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2008156459920009\n 0.1131520519131709\n -0.2076156254934889\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1211558175964662\n 0.06677125944808422\n -0.6817578942749346\n```\n\nSee also [`EpsilonSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "NuSVR" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -[MLJTransforms.TargetEncoder] +[MLJLIBSVMInterface.NuSVC] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Union{Real, Symbol}\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.TargetEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" -":is_pure_julia" = "`true`" -":human_name" = "target encoder" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.NuSVC" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ν-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using empirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",]\nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",]\nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable\ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "TargetEncoder" +":docstring" = """```julia\nNuSVC\n```\n\nA model type for constructing a ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNuSVC = @load NuSVC pkg=LIBSVM\n```\n\nDo `model = NuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVC(kernel=...)`.\n\nThis model is a re-parameterization of the [`SVC`](@ref) classifier, where `nu` replaces `cost`, and is mathematically equivalent to it. The parameter `nu` allows more direct control over the number of support vectors (see under \"Hyper-parameters\").\n\nThis model always predicts actual class labels. For probabilistic predictions, use instead [`ProbabilisticNuSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nNuSVC = @load NuSVC pkg=LIBSVM # model type\nmodel = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\nSee also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "NuSVC" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -[MLJTransforms.UnivariateBoxCoxTransformer] +[MLJLIBSVMInterface.ProbabilisticNuSVC] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateBoxCoxTransformer" -":hyperparameters" = "`(:n, :shift)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable Box-Cox transformer" -":is_supervised" = "`false`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "unknown" +":prediction_type" = ":probabilistic" +":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic ν-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJTransforms\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```julia\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```julia\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```julia\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateBoxCoxTransformer" -":target_in_fit" = "`false`" +":docstring" = """```julia\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "ProbabilisticNuSVC" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -[MLJTransforms.InteractionTransformer] +[MLJLIBSVMInterface.SVC] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.SVC" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "C-support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "SVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJTransforms.Standardizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Static`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.InteractionTransformer" -":hyperparameters" = "`(:order, :features)`" +":load_path" = "MLJTransforms.Standardizer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" ":is_pure_julia" = "`true`" -":human_name" = "interaction transformer" +":human_name" = "standardizer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nInteractionTransformer = @load InteractionTransformer pkg=MLJTransforms\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```julia\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```julia\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":docstring" = """```julia\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nStandardizer = @load Standardizer pkg=MLJTransforms\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```julia\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "InteractionTransformer" +":name" = "Standardizer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.UnivariateDiscretizer] +":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\",)`" + +[MLJTransforms.UnivariateTimeTypeToContinuous] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing,)`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateDiscretizer" -":hyperparameters" = "`(:n_classes,)`" +":load_path" = "MLJTransforms.UnivariateTimeTypeToContinuous" +":hyperparameters" = "`(:zero_time, :step)`" ":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" +":human_name" = "single variable transformer that creates continuous representations of temporally typed data" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJTransforms\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```julia\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":docstring" = """```julia\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJTransforms\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```julia\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "UnivariateDiscretizer" +":name" = "UnivariateTimeTypeToContinuous" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] +":implemented_methods" = [":clean!", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" -":is_wrapper" = "`false`" - -[MLJTransforms.CardinalityReducer] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" + +[MLJTransforms.OneHotEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" ":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" +":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.CardinalityReducer" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" +":load_path" = "MLJTransforms.OneHotEncoder" +":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" ":is_pure_julia" = "`true`" -":human_name" = "cardinality reducer" +":human_name" = "one-hot encoder" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency `< min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be an integer or a float which decides whether raw counts or normalized frequencies are used.\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A dictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" +":docstring" = """```julia\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOneHotEncoder = @load OneHotEncoder pkg=MLJTransforms\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (feature names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=false`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Table`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "CardinalityReducer" +":name" = "OneHotEncoder" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -8621,35 +8880,35 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table`" ":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.OrdinalEncoder] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" + +[MLJTransforms.ContinuousEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" +":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.OrdinalEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" +":load_path" = "MLJTransforms.ContinuousEncoder" +":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" ":is_pure_julia" = "`true`" -":human_name" = "ordinal encoder" +":human_name" = "continuous encoder" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":docstring" = """```julia\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nContinuousEncoder = @load ContinuousEncoder pkg=MLJTransforms\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Table`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "OrdinalEncoder" +":name" = "ContinuousEncoder" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -8657,36 +8916,36 @@ ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.FillImputer] +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" + +[MLJTransforms.FrequencyEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" ":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" +":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.FillImputer" -":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" +":load_path" = "MLJTransforms.FrequencyEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" ":is_pure_julia" = "`true`" -":human_name" = "fill imputer" +":human_name" = "frequency encoder" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFillImputer = @load FillImputer pkg=MLJTransforms\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```julia\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":docstring" = """```julia\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Table`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "FillImputer" +":name" = "FrequencyEncoder" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -8695,35 +8954,35 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table`" ":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.MissingnessEncoder] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" + +[MLJTransforms.TargetEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Union{Real, Symbol}\")`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" ":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.MissingnessEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" +":load_path" = "MLJTransforms.TargetEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" ":is_pure_julia" = "`true`" -":human_name" = "missingness encoder" +":human_name" = "target encoder" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A dictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" +":docstring" = """```julia\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using empirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",]\nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",]\nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable\ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Table`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "MissingnessEncoder" -":target_in_fit" = "`false`" +":name" = "TargetEncoder" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" @@ -8732,633 +8991,411 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table`" ":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.ContrastEncoder] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" + +[MLJTransforms.UnivariateBoxCoxTransformer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" +":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.ContrastEncoder" -":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" +":load_path" = "MLJTransforms.UnivariateBoxCoxTransformer" +":hyperparameters" = "`(:n, :shift)`" ":is_pure_julia" = "`true`" -":human_name" = "contrast encoder" +":human_name" = "single variable Box-Cox transformer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`. If `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname,k)`, where `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false,\n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":docstring" = """```julia\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJTransforms\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```julia\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```julia\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```julia\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "ContrastEncoder" +":name" = "UnivariateBoxCoxTransformer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.UnivariateStandardizer] +":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`()`" + +[MLJTransforms.InteractionTransformer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`()`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateStandardizer" -":hyperparameters" = "`()`" +":load_path" = "MLJTransforms.InteractionTransformer" +":hyperparameters" = "`(:order, :features)`" ":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" +":human_name" = "interaction transformer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":docstring" = """```julia\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nInteractionTransformer = @load InteractionTransformer pkg=MLJTransforms\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```julia\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```julia\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "UnivariateStandardizer" +":name" = "InteractionTransformer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":clean!", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":is_wrapper" = "`false`" - -[MLJTransforms.UnivariateFillImputer] +":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" + +[MLJTransforms.UnivariateDiscretizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\",)`" ":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" -":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateFillImputer" -":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" +":load_path" = "MLJTransforms.UnivariateDiscretizer" +":hyperparameters" = "`(:n_classes,)`" ":is_pure_julia" = "`true`" -":human_name" = "single variable fill imputer" +":human_name" = "single variable discretizer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJTransforms\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```julia\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":docstring" = """```julia\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJTransforms\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```julia\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" ":package_name" = "MLJTransforms" -":name" = "UnivariateFillImputer" +":name" = "UnivariateDiscretizer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.OneClassSVM] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.CardinalityReducer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJLIBSVMInterface.OneClassSVM" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "one-class support vector machine" +":load_path" = "MLJTransforms.CardinalityReducer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" +":is_pure_julia" = "`true`" +":human_name" = "cardinality reducer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```julia\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```julia\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```julia\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "OneClassSVM" +":docstring" = """```julia\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency `< min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be an integer or a float which decides whether raw counts or normalized frequencies are used.\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A dictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "CardinalityReducer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.EpsilonSVR] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.OrdinalEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.EpsilonSVR" -":hyperparameters" = "`(:kernel, :gamma, :epsilon, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "ϵ-support vector regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.OrdinalEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" +":is_pure_julia" = "`true`" +":human_name" = "ordinal encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nEpsilonSVR\n```\n\nA model type for constructing a ϵ-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM\n```\n\nDo `model = EpsilonSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EpsilonSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an adaptation of the classifier `SVC` to regression, but has an additional parameter `epsilon` (denoted $ϵ$ in the cited reference).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `epsilon=0.1` (range (0, `Inf`)): the parameter denoted $ϵ$ in the cited reference; `epsilon` is the thickness of the penalty-free neighborhood of the graph of the prediction function (\"slab\" or \"tube\"). Specifically, a data point `(x, y)` incurs no training loss unless it is outside this neighborhood; the further away it is from the this neighborhood, the greater the loss penalty.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM # model type\nmodel = EpsilonSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2512132502584155\n 0.007340201523624579\n -0.2482949812264707\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = EpsilonSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1121225361666656\n 0.04667702229741916\n -0.6958148424680672\n```\n\nSee also [`NuSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "EpsilonSVR" -":target_in_fit" = "`true`" +":docstring" = """```julia\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OrdinalEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.LinearSVC] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"LIBSVM.Linearsolver.LINEARSOLVER\", \"Float64\", \"Float64\", \"Float64\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" + +[MLJTransforms.FillImputer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.LinearSVC" -":hyperparameters" = "`(:solver, :tolerance, :cost, :bias)`" -":is_pure_julia" = "`false`" -":human_name" = "linear support vector classifier" -":is_supervised" = "`true`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.FillImputer" +":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" +":is_pure_julia" = "`true`" +":human_name" = "fill imputer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nLinearSVC\n```\n\nA model type for constructing a linear support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nLinearSVC = @load LinearSVC pkg=LIBSVM\n```\n\nDo `model = LinearSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearSVC(solver=...)`.\n\nReference for algorithm and core C-library: Rong-En Fan et al (2008): \"LIBLINEAR: A Library for Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf). \n\nThis model type is similar to `SVC` from the same package with the setting `kernel=LIBSVM.Kernel.KERNEL.Linear`, but is optimized for the linear case.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `solver=LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: linear solver, which must be one of the following from the LIBSVM.jl package:\n\n * `LIBSVM.Linearsolver.L2R_LR`: L2-regularized logistic regression (primal))\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: L2-regularized L2-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC`: L2-regularized L2-loss support vector classification (primal)\n * `LIBSVM.Linearsolver.L2R_L1LOSS_SVC_DUAL`: L2-regularized L1-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.MCSVM_CS`: support vector classification by Crammer and Singer) `LIBSVM.Linearsolver.L1R_L2LOSS_SVC`: L1-regularized L2-loss support vector classification)\n * `LIBSVM.Linearsolver.L1R_LR`: L1-regularized logistic regression\n * `LIBSVM.Linearsolver.L2R_LR_DUAL`: L2-regularized logistic regression (dual)\n * `tolerance::Float64=Inf`: tolerance for the stopping criterion;\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `bias= -1.0`: if `bias >= 0`, instance `x` becomes `[x; bias]`; if `bias < 0`, no bias term added (default -1)\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Examples\n\n```julia\nusing MLJ\nimport LIBSVM\n\nLinearSVC = @load LinearSVC pkg=LIBSVM # model type\nmodel = LinearSVC(solver=LIBSVM.Linearsolver.L2R_LR) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"versicolor\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the [`SVC`](@ref) and [`NuSVC`](@ref) classifiers, and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/liblinear/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "LinearSVC" -":target_in_fit" = "`true`" -":supports_class_weights" = "`true`" +":docstring" = """```julia\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFillImputer = @load FillImputer pkg=MLJTransforms\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```julia\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FillImputer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.ProbabilisticSVC] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJLIBSVMInterface.ProbabilisticSVC" -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "probabilistic C-support vector classifier" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nProbabilisticSVC\n```\n\nA model type for constructing a probabilistic C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticSVC(kernel=...)`.\n\nThis model is identical to [`SVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to the total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return probabilistic predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM # model type\nmodel = ProbabilisticSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00186, versicolor=>0.003, virginica=>0.995)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000563, versicolor=>0.0554, virginica=>0.944)\n UnivariateFinite{Multiclass{3}}(setosa=>1.4e-6, versicolor=>1.68e-6, virginica=>1.0)\n\n\njulia> labels = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`SVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref), and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "ProbabilisticSVC" -":target_in_fit" = "`true`" -":supports_class_weights" = "`true`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" -[MLJLIBSVMInterface.NuSVR] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.NuSVR" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "ν-support vector regressor" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nNuSVR\n```\n\nA model type for constructing a ν-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNuSVR = @load NuSVR pkg=LIBSVM\n```\n\nDo `model = NuSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is a re-parameterization of `EpsilonSVR` in which the `epsilon` hyper-parameter is replaced with a new parameter `nu` (denoted $ν$ in the cited reference) which attempts to control the number of support vectors directly.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be\n\n called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Denoted $ν$ in the cited paper. Changing `nu` changes the thickness of some neighborhood of the graph of the prediction function (\"tube\" or \"slab\") and a training error is said to occur when a data point `(x, y)` lies outside of that neighborhood.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nNuSVR = @load NuSVR pkg=LIBSVM # model type\nmodel = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2008156459920009\n 0.1131520519131709\n -0.2076156254934889\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1211558175964662\n 0.06677125944808422\n -0.6817578942749346\n```\n\nSee also [`EpsilonSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "NuSVR" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +[MLJTransforms.MissingnessEncoder] ":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.NuSVC] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.NuSVC" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "ν-support vector classifier" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.MissingnessEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" +":is_pure_julia" = "`true`" +":human_name" = "missingness encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nNuSVC\n```\n\nA model type for constructing a ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNuSVC = @load NuSVC pkg=LIBSVM\n```\n\nDo `model = NuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVC(kernel=...)`.\n\nThis model is a re-parameterization of the [`SVC`](@ref) classifier, where `nu` replaces `cost`, and is mathematically equivalent to it. The parameter `nu` allows more direct control over the number of support vectors (see under \"Hyper-parameters\").\n\nThis model always predicts actual class labels. For probabilistic predictions, use instead [`ProbabilisticNuSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nNuSVC = @load NuSVC pkg=LIBSVM # model type\nmodel = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\nSee also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "NuSVC" -":target_in_fit" = "`true`" +":docstring" = """```julia\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A dictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "MissingnessEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.SVC] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.SVC" -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "C-support vector classifier" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "SVC" -":target_in_fit" = "`true`" -":supports_class_weights" = "`true`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" -[MLJLIBSVMInterface.ProbabilisticNuSVC] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "probabilistic ν-support vector classifier" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "ProbabilisticNuSVC" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +[MLJTransforms.ContrastEncoder] ":is_wrapper" = "`false`" - -[MLJFlux.EntityEmbedder] -":is_wrapper" = "`true`" -":hyperparameter_types" = "`(\"Union{MLJFlux.MLJFluxDeterministic, MLJFlux.MLJFluxProbabilistic}\",)`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing,)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJFlux.EntityEmbedder" -":hyperparameters" = "`(:model,)`" +":load_path" = "MLJTransforms.ContrastEncoder" +":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" ":is_pure_julia" = "`true`" -":human_name" = "entity embedder" +":human_name" = "contrast encoder" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nEntityEmbedder(; model=supervised_mljflux_model)\n```\n\nWrapper for a MLJFlux supervised model, to convert it to a transformer. Such transformers are still presented a target variable in training, but they behave as transformers in MLJ pipelines. They are entity embedding transformers, in the sense of the article, \"Entity Embeddings of Categorical Variables\" by Cheng Guo, Felix Berkhahn.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(embed_model, X, y)\n```\n\nHere:\n\n * `embed_model` is an instance of `EntityEmbedder`, which wraps a supervised MLJFlux model, `model`, which must be an instance of one of these: `MLJFlux.NeuralNetworkClassifier`, `NeuralNetworkBinaryClassifier`, `MLJFlux.NeuralNetworkRegressor`,`MLJFlux.MultitargetNeuralNetworkRegressor`.\n * `X` is any table of input features supported by the model being wrapped. Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n * `y` is the target, which can be any `AbstractVector` supported by the model being wrapped.\n\nTrain the machine using `fit!(mach)`.\n\n# Examples\n\nIn the following example we wrap a `NeuralNetworkClassifier` as an `EntityEmbedder`, so that it can be used to supply continuously encoded features to a nearest neighbor model, which does not support categorical features.\n\n## Simple Example\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# Train and transform the data using the embedder:\nmach = machine(emb, X, y)\nfit!(mach)\nXnew = transform(mach, X)\n\n# Compare schemas before and after transformation\nschema(X)\nschema(Xnew)\n```\n\n## Using with Downstream Models (Pipeline)\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Other supervised model type, requiring `Continuous` features:\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# Now construct the pipeline:\npipe = emb |> KNNClassifier()\n\n# And train it to make predictions:\nmach = machine(pipe, X, y)\nfit!(mach)\npredict(mach, X)[1:3]\n```\n\nIt is to be emphasized that the `NeuralNertworkClassifier` is only being used to learn entity embeddings, not to make predictions, which here are made by `KNNClassifier()`.\n\nSee also [`NeuralNetworkClassifier`, `NeuralNetworkRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/FluxML/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "EntityEmbedder" -":target_in_fit" = "`true`" +":docstring" = """```julia\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`. If `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname,k)`, where `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false,\n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContrastEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":training_losses", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`true`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJFlux.MultitargetNeuralNetworkRegressor] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJFlux.MultitargetNeuralNetworkRegressor" -":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" -":is_pure_julia" = "`true`" -":human_name" = "multitarget neural network regressor" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nMultitargetNeuralNetworkRegressor\n```\n\nA model type for constructing a multitarget neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = MultitargetNeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetNeuralNetworkRegressor(builder=...)`.\n\n`MultitargetNeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a multi-valued `Continuous` target, represented as a table, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any table or matrix of output targets whose element scitype is `Continuous`; check column scitypes with `schema(y)`. If `y` is a `Matrix`, it is assumed to have columns corresponding to variables and rows corresponding to observations.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `Linear`, `Short`, and `MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we apply a multi-target regression model to synthetic data:\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we generate some synthetic data (needs MLJBase 0.20.16 or higher):\n\n```julia\nX, y = make_regression(100, 9; n_targets = 2) # both tables\nschema(y)\nschema(X)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features and `n_out` the number of target variables (both known at `fit!` time), while `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating the regression model:\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor\nmodel = MultitargetNeuralNetworkRegressor(builder=builder, rng=123, epochs=20)\n```\n\nWe will arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\nFor experimenting with learning rate, see the [`NeuralNetworkRegressor`](@ref) example.\n\n```julia\npipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we can now compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=multitarget_l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # trains on all data `(X, y)`\nyhat = predict(mach, Xtest)\nmultitarget_l2(yhat, ytest)\n```\n\nSee also [`NeuralNetworkRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "MultitargetNeuralNetworkRegressor" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":supports_training_losses" = "`true`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -[MLJFlux.NeuralNetworkClassifier] +[MLJTransforms.UnivariateStandardizer] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`()`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`()`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJFlux.NeuralNetworkClassifier" -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateStandardizer" +":hyperparameters" = "`()`" ":is_pure_julia" = "`true`" -":human_name" = "neural network classifier" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nNeuralNetworkClassifier\n```\n\nA model type for constructing a neural network classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkClassifier(builder=...)`.\n\n`NeuralNetworkClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a `Multiclass` or `OrderedFactor` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass` or `OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights.] Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ\nusing Flux\nimport RDatasets\nimport Optimisers\n```\n\nFirst, we can load the data:\n\n```julia\niris = RDatasets.dataset(\"datasets\", \"iris\");\ny, X = unpack(iris, ==(:Species), rng=123); # a vector and a table\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\nclf = NeuralNetworkClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(clf, X, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia\nclf.optimiser = Optimisers.Adam(clf.optimiser.eta * 2)\nclf.epochs = clf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(clf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(clf, X, y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy)\nusing Plots\nplot(curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\")\n\n```\n\nSee also [`ImageClassifier`](@ref), [`NeuralNetworkBinaryClassifier`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "NeuralNetworkClassifier" -":target_in_fit" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateStandardizer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":supports_training_losses" = "`true`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -[MLJFlux.ImageClassifier] +[MLJTransforms.UnivariateFillImputer] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Image}, AbstractVector{<:ScientificTypesBase.Multiclass}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" +":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJFlux.ImageClassifier" -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateFillImputer" +":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" ":is_pure_julia" = "`true`" -":human_name" = "image classifier" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nImageClassifier\n```\n\nA model type for constructing a image classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\n```\n\nDo `model = ImageClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ImageClassifier(builder=...)`.\n\n`ImageClassifier` classifies images using a neural network adapted to the type of images provided (color or gray scale). Predictions are probabilistic. Users provide a recipe for constructing the network, based on properties of the image encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any `AbstractVector` of images with `ColorImage` or `GrayImage` scitype; check the scitype with `scitype(X)` and refer to ScientificTypes.jl documentation on coercing typical image formats into an appropriate type.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder`: An MLJFlux builder that constructs the neural network. The fallback builds a depth-16 VGG architecture adapted to the image size and number of target classes, with no batch normalization; see the Metalhead.jl documentation for details. See the example below for a user-specified builder. A convenience macro `@builder` is also available. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we use MLJFlux and a custom builder to classify the MNIST image dataset.\n\n```julia\nusing MLJ\nusing Flux\nimport MLJFlux\nimport Optimisers\nimport MLJIteration # for `skip` control\n```\n\nFirst we want to download the MNIST dataset, and unpack into images and labels:\n\n```julia\nimport MLDatasets: MNIST\ndata = MNIST(split=:train)\nimages, labels = data.features, data.targets\n```\n\nIn MLJ, integers cannot be used for encoding categorical data, so we must coerce them into the `Multiclass` scitype:\n\n```julia\nlabels = coerce(labels, Multiclass);\n```\n\nAbove `images` is a single array but MLJFlux requires the images to be a vector of individual image arrays:\n\n```julia\nimages = coerce(images, GrayImage);\nimages[1]\n```\n\nWe start by defining a suitable `builder` object. This is a recipe for building the neural network. Our builder will work for images of any (constant) size, whether they be color or black and white (ie, single or multi-channel). The architecture always consists of six alternating convolution and max-pool layers, and a final dense layer; the filter size and the number of channels after each convolution layer is customizable.\n\n```julia\nimport MLJFlux\n\nstruct MyConvBuilder\n filter_size::Int\n channels1::Int\n channels2::Int\n channels3::Int\nend\n\nmake2d(x::AbstractArray) = reshape(x, :, size(x)[end])\n\nfunction MLJFlux.build(b::MyConvBuilder, rng, n_in, n_out, n_channels)\n k, c1, c2, c3 = b.filter_size, b.channels1, b.channels2, b.channels3\n mod(k, 2) == 1 || error(\"`filter_size` must be odd. \")\n p = div(k - 1, 2) # padding to preserve image size\n init = Flux.glorot_uniform(rng)\n front = Chain(\n Conv((k, k), n_channels => c1, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c1 => c2, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c2 => c3, pad=(p, p), relu, init=init),\n MaxPool((2 ,2)),\n make2d)\n d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first\n return Chain(front, Dense(d, n_out, init=init))\nend\n```\n\nIt is important to note that in our `build` function, there is no final `softmax`. This is applied by default in all MLJFlux classifiers (override this using the `finaliser` hyperparameter).\n\nNow that our builder is defined, we can instantiate the actual MLJFlux model. If you have a GPU, you can substitute in `acceleration=CUDALibs()` below to speed up training.\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\nclf = ImageClassifier(builder=MyConvBuilder(3, 16, 32, 32),\n batch_size=50,\n epochs=10,\n rng=123)\n```\n\nYou can add Flux options such as `optimiser` and `loss` in the snippet above. Currently, `loss` must be a flux-compatible loss, and not an MLJ measure.\n\nNext, we can bind the model with the data in a machine, and train using the first 500 images:\n\n```julia\nmach = machine(clf, images, labels);\nfit!(mach, rows=1:500, verbosity=2);\nreport(mach)\nchain = fitted_params(mach)\nFlux.params(chain)[2]\n```\n\nWe can tack on 20 more epochs by modifying the `epochs` field, and iteratively fit some more:\n\n```julia\nclf.epochs = clf.epochs + 20\nfit!(mach, rows=1:500, verbosity=2);\n```\n\nWe can also make predictions and calculate an out-of-sample loss estimate, using any MLJ measure (loss/score):\n\n```julia\npredicted_labels = predict(mach, rows=501:1000);\ncross_entropy(predicted_labels, labels[501:1000])\n```\n\nThe preceding `fit!`/`predict`/evaluate workflow can be alternatively executed as follows:\n\n```julia\nevaluate!(mach,\n resampling=Holdout(fraction_train=0.5),\n measure=cross_entropy,\n rows=1:1000,\n verbosity=0)\n```\n\nSee also [`NeuralNetworkClassifier`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "ImageClassifier" -":target_in_fit" = "`true`" +":human_name" = "single variable fill imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJTransforms\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```julia\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateFillImputer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Multiclass}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Multiclass}`" -":supports_training_losses" = "`true`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":constructor" = "`nothing`" -[MLJFlux.NeuralNetworkBinaryClassifier] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +[MLJEnsembles.EnsembleModel] +":constructor" = "`EnsembleModel`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" +":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Binary}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "MIT" +":package_license" = "unknown" ":prediction_type" = ":probabilistic" -":load_path" = "MLJFlux.NeuralNetworkBinaryClassifier" -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" -":is_pure_julia" = "`true`" -":human_name" = "neural network binary classifier" +":load_path" = "MLJEnsembles.EnsembleModel" +":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic ensemble model" ":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nNeuralNetworkBinaryClassifier\n```\n\nA model type for constructing a neural network binary classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkBinaryClassifier(builder=...)`.\n\n`NeuralNetworkBinaryClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a binary (`Multiclass{2}` or `OrderedFactor{2}`) target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass{2}` or `OrderedFactor{2}`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.binarycrossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.binarycrossentropy`: Standard binary classification loss, also known as the log loss.\n * `Flux.logitbinarycrossentropy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `σ` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default sigmoid finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.binary_focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.σ`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.σ`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ, Flux\nimport Optimisers\nimport RDatasets\n```\n\nFirst, we can load the data:\n\n```julia\nmtcars = RDatasets.dataset(\"datasets\", \"mtcars\");\ny, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec]));\n```\n\nNote that `y` is a vector and `X` a table.\n\n```julia\ny = categorical(y) # classifier takes catogorical input\nX_f32 = Float32.(X) # To match floating point type of the neural network layers\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\nbclf = NeuralNetworkBinaryClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(bclf, X_f32, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia-repl\njulia> bclf.optimiser\nAdam(0.001, (0.9, 0.999), 1.0e-8)\n```\n\n```julia\nbclf.optimiser = Optimisers.Adam(eta = bclf.optimiser.eta * 2)\nbclf.epochs = bclf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X_f32), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(bclf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(\n bclf,\n X_f32,\n y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy,\n)\nusing Plots\nplot(\n curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\",\n)\n\n```\n\nSee also [`ImageClassifier`](@ref).\n""" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "NeuralNetworkBinaryClassifier" +":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" +":package_name" = "MLJEnsembles" +":name" = "EnsembleModel" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" +":implemented_methods" = [] +":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" -":supports_training_losses" = "`true`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJFlux.NeuralNetworkRegressor] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJFlux.NeuralNetworkRegressor" -":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" -":is_pure_julia" = "`true`" -":human_name" = "neural network regressor" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nNeuralNetworkRegressor\n```\n\nA model type for constructing a neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkRegressor(builder=...)`.\n\n`NeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a `Continuous` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increasing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalized if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a regression model for the Boston house price dataset.\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we load in the data: The `:MEDV` column becomes the target vector `y`, and all remaining columns go into a table `X`, with the exception of `:CHAS`:\n\n```julia\ndata = OpenML.load(531); # Loads from https://www.openml.org/d/531\ny, X = unpack(data, ==(:MEDV), !=(:CHAS); rng=123);\n\nscitype(y)\nschema(X)\n```\n\nSince MLJFlux models do not handle ordered factors, we'll treat `:RAD` as `Continuous`:\n\n```julia\nX = coerce(X, :RAD=>Continuous)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features (which will be known at `fit!` time) and `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below). We also have the parameter `n_out` which is the number of output features. As we are doing single target regression, the value passed will always be `1`, but the builder we define will also work for [`MultitargetNeuralNetworkRegressor`](@ref).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating a model:\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\nmodel = NeuralNetworkRegressor(\n builder=builder,\n rng=123,\n epochs=20\n)\n```\n\nWe arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`.\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\n## Experimenting with learning rate\n\nWe can visually compare how the learning rate affects the predictions:\n\n```julia\nusing Plots\n\nrates = rates = [5e-5, 1e-4, 0.005, 0.001, 0.05]\nplt=plot()\n\nforeach(rates) do η\n pipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(η)\n fit!(mach, force=true, verbosity=0)\n losses =\n report(mach).transformed_target_model_deterministic.model.training_losses[3:end]\n plot!(1:length(losses), losses, label=η)\nend\n\nplt\n\npipe.transformed_target_model_deterministic.model.optimiser.eta = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # train on `(X, y)`\nyhat = predict(mach, Xtest)\nl2(yhat, ytest)\n```\n\nThese losses, for the pipeline model, refer to the target on the original, unstandardized, scale.\n\nFor implementing stopping criterion and other iteration controls, refer to examples linked from the MLJFlux documentation.\n\nSee also [`MultitargetNeuralNetworkRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "NeuralNetworkRegressor" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":supports_training_losses" = "`true`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`true`" diff --git a/src/registry/Project.toml b/src/registry/Project.toml index 1e96fd3..e379b24 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -27,6 +27,7 @@ MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" +MLJWrappers = "b5d0f7f3-9870-4c70-ba08-cb780c37e63f" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Maxnet = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"