diff --git a/Project.toml b/Project.toml index 335380e..348052c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModels" uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" authors = ["Anthony D. Blaom "] -version = "0.18.6" +version = "0.18.7" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 82e0b6c..ef8a525 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -739,227 +739,597 @@ ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[MLJFlux.EntityEmbedder] -":is_wrapper" = "`true`" -":hyperparameter_types" = "`(\"Union{MLJFlux.MLJFluxDeterministic, MLJFlux.MLJFluxProbabilistic}\",)`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing,)`" +[MLJTransforms.Standardizer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" +":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJFlux.EntityEmbedder" -":hyperparameters" = "`(:model,)`" +":load_path" = "MLJTransforms.Standardizer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" ":is_pure_julia" = "`true`" -":human_name" = "entity embedder" +":human_name" = "standardizer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nEntityEmbedder(; model=supervised_mljflux_model)\n```\n\nWrapper for a MLJFlux supervised model, to convert it to a transformer. Such transformers are still presented a target variable in training, but they behave as transformers in MLJ pipelines. They are entity embedding transformers, in the sense of the article, \"Entity Embeddings of Categorical Variables\" by Cheng Guo, Felix Berkhahn.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(embed_model, X, y)\n```\n\nHere:\n\n * `embed_model` is an instance of `EntityEmbedder`, which wraps a supervised MLJFlux model, `model`, which must be an instance of one of these: `MLJFlux.NeuralNetworkClassifier`, `NeuralNetworkBinaryClassifier`, `MLJFlux.NeuralNetworkRegressor`,`MLJFlux.MultitargetNeuralNetworkRegressor`.\n * `X` is any table of input features supported by the model being wrapped. Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n * `y` is the target, which can be any `AbstractVector` supported by the model being wrapped.\n\nTrain the machine using `fit!(mach)`.\n\n# Examples\n\nIn the following example we wrap a `NeuralNetworkClassifier` as an `EntityEmbedder`, so that it can be used to supply continuously encoded features to a nearest neighbor model, which does not support categorical features.\n\n## Simple Example\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# Train and transform the data using the embedder:\nmach = machine(emb, X, y)\nfit!(mach)\nXnew = transform(mach, X)\n\n# Compare schemas before and after transformation\nschema(X)\nschema(Xnew)\n```\n\n## Using with Downstream Models (Pipeline)\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Other supervised model type, requiring `Continuous` features:\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# Now construct the pipeline:\npipe = emb |> KNNClassifier()\n\n# And train it to make predictions:\nmach = machine(pipe, X, y)\nfit!(mach)\npredict(mach, X)[1:3]\n```\n\nIt is to be emphasized that the `NeuralNertworkClassifier` is only being used to learn entity embeddings, not to make predictions, which here are made by `KNNClassifier()`.\n\nSee also [`NeuralNetworkClassifier`, `NeuralNetworkRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/FluxML/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "EntityEmbedder" -":target_in_fit" = "`true`" +":docstring" = """```julia\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nStandardizer = @load Standardizer pkg=MLJTransforms\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table, or any abstract vector with `Continuous` element scitype (any abstract float vector). Features in a table without `Continuous` scitype will be ignored; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```julia\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "Standardizer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":training_losses", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`true`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJFlux.MultitargetNeuralNetworkRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateTimeTypeToContinuous] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJFlux.MultitargetNeuralNetworkRegressor" -":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateTimeTypeToContinuous" +":hyperparameters" = "`(:zero_time, :step)`" ":is_pure_julia" = "`true`" -":human_name" = "multitarget neural network regressor" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nMultitargetNeuralNetworkRegressor\n```\n\nA model type for constructing a multitarget neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = MultitargetNeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetNeuralNetworkRegressor(builder=...)`.\n\n`MultitargetNeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a multi-valued `Continuous` target, represented as a table, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any table or matrix of output targets whose element scitype is `Continuous`; check column scitypes with `schema(y)`. If `y` is a `Matrix`, it is assumed to have columns corresponding to variables and rows corresponding to observations.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `Linear`, `Short`, and `MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we apply a multi-target regression model to synthetic data:\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we generate some synthetic data (needs MLJBase 0.20.16 or higher):\n\n```julia\nX, y = make_regression(100, 9; n_targets = 2) # both tables\nschema(y)\nschema(X)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features and `n_out` the number of target variables (both known at `fit!` time), while `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating the regression model:\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor\nmodel = MultitargetNeuralNetworkRegressor(builder=builder, rng=123, epochs=20)\n```\n\nWe will arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\nFor experimenting with learning rate, see the [`NeuralNetworkRegressor`](@ref) example.\n\n```julia\npipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we can now compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=multitarget_l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # trains on all data `(X, y)`\nyhat = predict(mach, Xtest)\nmultitarget_l2(yhat, ytest)\n```\n\nSee also [`NeuralNetworkRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "MultitargetNeuralNetworkRegressor" -":target_in_fit" = "`true`" +":human_name" = "single variable transformer that creates continuous representations of temporally typed data" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJTransforms\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```julia\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateTimeTypeToContinuous" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":supports_training_losses" = "`true`" +":implemented_methods" = [":clean!", ":fit", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJFlux.NeuralNetworkClassifier] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.OneHotEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJFlux.NeuralNetworkClassifier" -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.OneHotEncoder" +":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" ":is_pure_julia" = "`true`" -":human_name" = "neural network classifier" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nNeuralNetworkClassifier\n```\n\nA model type for constructing a neural network classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkClassifier(builder=...)`.\n\n`NeuralNetworkClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a `Multiclass` or `OrderedFactor` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass` or `OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights.] Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ\nusing Flux\nimport RDatasets\nimport Optimisers\n```\n\nFirst, we can load the data:\n\n```julia\niris = RDatasets.dataset(\"datasets\", \"iris\");\ny, X = unpack(iris, ==(:Species), rng=123); # a vector and a table\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\nclf = NeuralNetworkClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(clf, X, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia\nclf.optimiser = Optimisers.Adam(clf.optimiser.eta * 2)\nclf.epochs = clf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(clf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(clf, X, y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy)\nusing Plots\nplot(curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\")\n\n```\n\nSee also [`ImageClassifier`](@ref), [`NeuralNetworkBinaryClassifier`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "NeuralNetworkClassifier" -":target_in_fit" = "`true`" +":human_name" = "one-hot encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOneHotEncoder = @load OneHotEncoder pkg=MLJTransforms\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (feature names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=true`: when `false`, `OrderedFactor` features are left untouched.\n * `drop_last=false`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OneHotEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":supports_training_losses" = "`true`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.ContinuousEncoder] ":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.ContinuousEncoder" +":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" +":is_pure_julia" = "`true`" +":human_name" = "continuous encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nContinuousEncoder = @load ContinuousEncoder pkg=MLJTransforms\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContinuousEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" -[MLJFlux.ImageClassifier] +[MLJTransforms.FrequencyEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.FrequencyEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" +":is_pure_julia" = "`true`" +":human_name" = "frequency encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FrequencyEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.TargetEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Union{Real, Symbol}\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Image}, AbstractVector{<:ScientificTypesBase.Multiclass}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJFlux.ImageClassifier" -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.TargetEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" ":is_pure_julia" = "`true`" -":human_name" = "image classifier" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nImageClassifier\n```\n\nA model type for constructing a image classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\n```\n\nDo `model = ImageClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ImageClassifier(builder=...)`.\n\n`ImageClassifier` classifies images using a neural network adapted to the type of images provided (color or gray scale). Predictions are probabilistic. Users provide a recipe for constructing the network, based on properties of the image encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any `AbstractVector` of images with `ColorImage` or `GrayImage` scitype; check the scitype with `scitype(X)` and refer to ScientificTypes.jl documentation on coercing typical image formats into an appropriate type.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder`: An MLJFlux builder that constructs the neural network. The fallback builds a depth-16 VGG architecture adapted to the image size and number of target classes, with no batch normalization; see the Metalhead.jl documentation for details. See the example below for a user-specified builder. A convenience macro `@builder` is also available. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we use MLJFlux and a custom builder to classify the MNIST image dataset.\n\n```julia\nusing MLJ\nusing Flux\nimport MLJFlux\nimport Optimisers\nimport MLJIteration # for `skip` control\n```\n\nFirst we want to download the MNIST dataset, and unpack into images and labels:\n\n```julia\nimport MLDatasets: MNIST\ndata = MNIST(split=:train)\nimages, labels = data.features, data.targets\n```\n\nIn MLJ, integers cannot be used for encoding categorical data, so we must coerce them into the `Multiclass` scitype:\n\n```julia\nlabels = coerce(labels, Multiclass);\n```\n\nAbove `images` is a single array but MLJFlux requires the images to be a vector of individual image arrays:\n\n```julia\nimages = coerce(images, GrayImage);\nimages[1]\n```\n\nWe start by defining a suitable `builder` object. This is a recipe for building the neural network. Our builder will work for images of any (constant) size, whether they be color or black and white (ie, single or multi-channel). The architecture always consists of six alternating convolution and max-pool layers, and a final dense layer; the filter size and the number of channels after each convolution layer is customizable.\n\n```julia\nimport MLJFlux\n\nstruct MyConvBuilder\n filter_size::Int\n channels1::Int\n channels2::Int\n channels3::Int\nend\n\nmake2d(x::AbstractArray) = reshape(x, :, size(x)[end])\n\nfunction MLJFlux.build(b::MyConvBuilder, rng, n_in, n_out, n_channels)\n k, c1, c2, c3 = b.filter_size, b.channels1, b.channels2, b.channels3\n mod(k, 2) == 1 || error(\"`filter_size` must be odd. \")\n p = div(k - 1, 2) # padding to preserve image size\n init = Flux.glorot_uniform(rng)\n front = Chain(\n Conv((k, k), n_channels => c1, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c1 => c2, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c2 => c3, pad=(p, p), relu, init=init),\n MaxPool((2 ,2)),\n make2d)\n d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first\n return Chain(front, Dense(d, n_out, init=init))\nend\n```\n\nIt is important to note that in our `build` function, there is no final `softmax`. This is applied by default in all MLJFlux classifiers (override this using the `finaliser` hyperparameter).\n\nNow that our builder is defined, we can instantiate the actual MLJFlux model. If you have a GPU, you can substitute in `acceleration=CUDALibs()` below to speed up training.\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\nclf = ImageClassifier(builder=MyConvBuilder(3, 16, 32, 32),\n batch_size=50,\n epochs=10,\n rng=123)\n```\n\nYou can add Flux options such as `optimiser` and `loss` in the snippet above. Currently, `loss` must be a flux-compatible loss, and not an MLJ measure.\n\nNext, we can bind the model with the data in a machine, and train using the first 500 images:\n\n```julia\nmach = machine(clf, images, labels);\nfit!(mach, rows=1:500, verbosity=2);\nreport(mach)\nchain = fitted_params(mach)\nFlux.params(chain)[2]\n```\n\nWe can tack on 20 more epochs by modifying the `epochs` field, and iteratively fit some more:\n\n```julia\nclf.epochs = clf.epochs + 20\nfit!(mach, rows=1:500, verbosity=2);\n```\n\nWe can also make predictions and calculate an out-of-sample loss estimate, using any MLJ measure (loss/score):\n\n```julia\npredicted_labels = predict(mach, rows=501:1000);\ncross_entropy(predicted_labels, labels[501:1000])\n```\n\nThe preceding `fit!`/`predict`/evaluate workflow can be alternatively executed as follows:\n\n```julia\nevaluate!(mach,\n resampling=Holdout(fraction_train=0.5),\n measure=cross_entropy,\n rows=1:1000,\n verbosity=0)\n```\n\nSee also [`NeuralNetworkClassifier`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "ImageClassifier" +":human_name" = "target encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using empirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",]\nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",]\nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable\ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "TargetEncoder" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Multiclass}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Multiclass}`" -":supports_training_losses" = "`true`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.UnivariateBoxCoxTransformer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateBoxCoxTransformer" +":hyperparameters" = "`(:n, :shift)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable Box-Cox transformer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJTransforms\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```julia\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```julia\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```julia\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateBoxCoxTransformer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" + +[MLJTransforms.InteractionTransformer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.InteractionTransformer" +":hyperparameters" = "`(:order, :features)`" +":is_pure_julia" = "`true`" +":human_name" = "interaction transformer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nInteractionTransformer = @load InteractionTransformer pkg=MLJTransforms\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```julia\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```julia\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "InteractionTransformer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.UnivariateDiscretizer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\",)`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateDiscretizer" +":hyperparameters" = "`(:n_classes,)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJTransforms\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```julia\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateDiscretizer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":is_wrapper" = "`false`" + +[MLJTransforms.CardinalityReducer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.CardinalityReducer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" +":is_pure_julia" = "`true`" +":human_name" = "cardinality reducer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency `< min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be an integer or a float which decides whether raw counts or normalized frequencies are used.\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A dictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "CardinalityReducer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.OrdinalEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.OrdinalEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" +":is_pure_julia" = "`true`" +":human_name" = "ordinal encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OrdinalEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.FillImputer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.FillImputer" +":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" +":is_pure_julia" = "`true`" +":human_name" = "fill imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFillImputer = @load FillImputer pkg=MLJTransforms\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```julia\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FillImputer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.MissingnessEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.MissingnessEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" +":is_pure_julia" = "`true`" +":human_name" = "missingness encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A dictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "MissingnessEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.ContrastEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.ContrastEncoder" +":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" +":is_pure_julia" = "`true`" +":human_name" = "contrast encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`. If `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname,k)`, where `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false,\n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContrastEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJFlux.NeuralNetworkBinaryClassifier] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateStandardizer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`()`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`()`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Binary}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJFlux.NeuralNetworkBinaryClassifier" -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateStandardizer" +":hyperparameters" = "`()`" ":is_pure_julia" = "`true`" -":human_name" = "neural network binary classifier" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nNeuralNetworkBinaryClassifier\n```\n\nA model type for constructing a neural network binary classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkBinaryClassifier(builder=...)`.\n\n`NeuralNetworkBinaryClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a binary (`Multiclass{2}` or `OrderedFactor{2}`) target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass{2}` or `OrderedFactor{2}`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.binarycrossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.binarycrossentropy`: Standard binary classification loss, also known as the log loss.\n * `Flux.logitbinarycrossentropy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `σ` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default sigmoid finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.binary_focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.σ`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.σ`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ, Flux\nimport Optimisers\nimport RDatasets\n```\n\nFirst, we can load the data:\n\n```julia\nmtcars = RDatasets.dataset(\"datasets\", \"mtcars\");\ny, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec]));\n```\n\nNote that `y` is a vector and `X` a table.\n\n```julia\ny = categorical(y) # classifier takes catogorical input\nX_f32 = Float32.(X) # To match floating point type of the neural network layers\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\nbclf = NeuralNetworkBinaryClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(bclf, X_f32, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia-repl\njulia> bclf.optimiser\nAdam(0.001, (0.9, 0.999), 1.0e-8)\n```\n\n```julia\nbclf.optimiser = Optimisers.Adam(eta = bclf.optimiser.eta * 2)\nbclf.epochs = bclf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X_f32), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(bclf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(\n bclf,\n X_f32,\n y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy,\n)\nusing Plots\nplot(\n curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\",\n)\n\n```\n\nSee also [`ImageClassifier`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "NeuralNetworkBinaryClassifier" -":target_in_fit" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateStandardizer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" -":supports_training_losses" = "`true`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJFlux.NeuralNetworkRegressor] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateFillImputer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" +":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJFlux.NeuralNetworkRegressor" -":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateFillImputer" +":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" ":is_pure_julia" = "`true`" -":human_name" = "neural network regressor" -":is_supervised" = "`true`" -":iteration_parameter" = ":epochs" -":docstring" = """```julia\nNeuralNetworkRegressor\n```\n\nA model type for constructing a neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkRegressor(builder=...)`.\n\n`NeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a `Continuous` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increasing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalized if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a regression model for the Boston house price dataset.\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we load in the data: The `:MEDV` column becomes the target vector `y`, and all remaining columns go into a table `X`, with the exception of `:CHAS`:\n\n```julia\ndata = OpenML.load(531); # Loads from https://www.openml.org/d/531\ny, X = unpack(data, ==(:MEDV), !=(:CHAS); rng=123);\n\nscitype(y)\nschema(X)\n```\n\nSince MLJFlux models do not handle ordered factors, we'll treat `:RAD` as `Continuous`:\n\n```julia\nX = coerce(X, :RAD=>Continuous)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features (which will be known at `fit!` time) and `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below). We also have the parameter `n_out` which is the number of output features. As we are doing single target regression, the value passed will always be `1`, but the builder we define will also work for [`MultitargetNeuralNetworkRegressor`](@ref).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating a model:\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\nmodel = NeuralNetworkRegressor(\n builder=builder,\n rng=123,\n epochs=20\n)\n```\n\nWe arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`.\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\n## Experimenting with learning rate\n\nWe can visually compare how the learning rate affects the predictions:\n\n```julia\nusing Plots\n\nrates = rates = [5e-5, 1e-4, 0.005, 0.001, 0.05]\nplt=plot()\n\nforeach(rates) do η\n pipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(η)\n fit!(mach, force=true, verbosity=0)\n losses =\n report(mach).transformed_target_model_deterministic.model.training_losses[3:end]\n plot!(1:length(losses), losses, label=η)\nend\n\nplt\n\npipe.transformed_target_model_deterministic.model.optimiser.eta = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # train on `(X, y)`\nyhat = predict(mach, Xtest)\nl2(yhat, ytest)\n```\n\nThese losses, for the pipeline model, refer to the target on the original, unstandardized, scale.\n\nFor implementing stopping criterion and other iteration controls, refer to examples linked from the MLJFlux documentation.\n\nSee also [`MultitargetNeuralNetworkRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":package_name" = "MLJFlux" -":name" = "NeuralNetworkRegressor" -":target_in_fit" = "`true`" +":human_name" = "single variable fill imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJTransforms\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```julia\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateFillImputer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":predict"] -":deep_properties" = "`(:optimiser, :builder)`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":supports_training_losses" = "`true`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":is_wrapper" = "`false`" [CatBoost.CatBoostRegressor] ":constructor" = "`nothing`" @@ -5624,41 +5994,41 @@ ":is_wrapper" = "`true`" [MLJWrappers.Transformer] -":constructor" = "`MLJWrappers.Transformer`" -":hyperparameter_types" = "`(\"MLJModelInterface.Model\",)`" -":package_uuid" = "b5d0f7f3-9870-4c70-ba08-cb780c37e63f" -":hyperparameter_ranges" = "`(nothing,)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":input_scitype" = "`ScientificTypesBase.Unknown`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJWrappers" ":package_license" = "unknown" -":prediction_type" = ":unknown" ":load_path" = "MLJWrappers.Transformer" -":hyperparameters" = "`(:model,)`" -":is_pure_julia" = "`true`" -":human_name" = "transformer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nTransformer(supervised_model)\n```\n\nWrap `supervised_model` so that it is treated as a transformer in MLJ pipelines. It is assumed that `supervised_model isa Supervised` and that `transform` is implemented for the model type.\n\nFor `Supervised` models in an MLJ pipeline, it is the output of `predict` that is propagated by default to the next model in the pipeline. By wrapping in `Transform`, the output of `transform` is propagated instead.\n\nThe original hyperparameters of `supervised_model` are nested hyperparameters in `Transformer(supervised_model)`, but in most other respects the latter behaves like `supervised_model`.\n\n# Example\n\nBelow `reducer` is a supervised model implementing `transform` which selects features using Recursive Feature Elimination. Because it is supervised, we obtain an error when we follow it with another supervised model in a pipeline:\n\n```julia\nusing MLJ\nRandomForestClassifier = @load RandomForestClassifier pkg=DecisionTree\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n\nreducer = RecursiveFeatureElimination(RandomForestClassifier(), n_features=2)\nreducer |> KNNClassifier()\n# ERROR: ArgumentError: More than one supervised model in a pipeline is not permitted\n```\n\nThe following, however, works as expected, passing the reduced training features to the K-nearest neighbor classifier, when `pipe` is trained.\n\n```julia\npipe = Transformer(reducer) |> KNNClassifier()\n```\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_uuid" = "b5d0f7f3-9870-4c70-ba08-cb780c37e63f" ":package_url" = "https://github.com/JuliaAI/Transformer.jl" -":package_name" = "MLJWrappers.jl" -":name" = "Transformer" -":target_in_fit" = "`true`" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" +":docstring" = """```julia\nTransformer(supervised_model)\n```\n\nWrap `supervised_model` so that it is treated as a transformer in MLJ pipelines. It is assumed that `supervised_model isa Supervised` and that `transform` is implemented for the model type.\n\nFor `Supervised` models in an MLJ pipeline, it is the output of `predict` that is propagated by default to the next model in the pipeline. By wrapping in `Transform`, the output of `transform` is propagated instead.\n\nThe original hyperparameters of `supervised_model` are nested hyperparameters in `Transformer(supervised_model)`, but in most other respects the latter behaves like `supervised_model`.\n\n# Example\n\nBelow `reducer` is a supervised model implementing `transform` which selects features using Recursive Feature Elimination. Because it is supervised, we obtain an error when we follow it with another supervised model in a pipeline:\n\n```julia\nusing MLJ\nRandomForestClassifier = @load RandomForestClassifier pkg=DecisionTree\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n\nreducer = RecursiveFeatureElimination(RandomForestClassifier(), n_features=2)\nreducer |> KNNClassifier()\n# ERROR: ArgumentError: More than one supervised model in a pipeline is not permitted\n```\n\nThe following, however, works as expected, passing the reduced training features to the K-nearest neighbor classifier, when `pipe` is trained.\n\n```julia\npipe = Transformer(reducer) |> KNNClassifier()\n```\n""" +":name" = "Transformer" +":human_name" = "transformer" +":tags" = [] +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":fitted_params", ":reformat", ":training_losses", ":transform", ":update"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":hyperparameters" = "`(:model,)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Model\",)`" +":hyperparameter_ranges" = "`(nothing,)`" +":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`MLJWrappers.Transformer`" [Imbalance.RandomOversampler] ":constructor" = "`nothing`" @@ -6105,77 +6475,77 @@ ":is_wrapper" = "`true`" [FeatureSelection.FeatureSelector] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" +":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "FeatureSelection" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "FeatureSelection.FeatureSelector" -":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":hyperparameters" = "`(:features, :ignore)`" +":is_pure_julia" = "`true`" +":human_name" = "feature selector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [FeatureSelection.jl](https://github.com/JuliaAI/FeatureSelection.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFeatureSelector = @load FeatureSelector pkg=FeatureSelection\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```julia\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" ":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "FeatureSelection" +":name" = "FeatureSelector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [FeatureSelection.jl](https://github.com/JuliaAI/FeatureSelection.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFeatureSelector = @load FeatureSelector pkg=FeatureSelection\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```julia\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n""" -":name" = "FeatureSelector" -":human_name" = "feature selector" -":tags" = [] -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore)`" -":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" [FeatureSelection.RecursiveFeatureElimination] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" +":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "FeatureSelection" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "FeatureSelection.RecursiveFeatureElimination" -":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":hyperparameters" = "`(:model, :n_features, :step)`" +":is_pure_julia" = "`true`" +":human_name" = "probabilistic recursive feature elimination" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nRecursiveFeatureElimination(model; n_features=0, step=1)\n```\n\nThis model implements a recursive feature elimination algorithm for feature selection. It recursively removes features, training a base model on the remaining features and evaluating their importance until the desired number of features is selected.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `rfe_model` to data with\n\n```julia\nmach = machine(rfe_model, X, y)\n```\n\nOR, if the base model supports weights, as\n\n```julia\nmach = machine(rfe_model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype as that required by the base model; check column scitypes with `schema(X)` and column scitypes required by base model with `input_scitype(basemodel)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous` or `Finite` depending on the `target_scitype` required by the base model; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * model: A base model with a `fit` method that provides information on feature feature importance (i.e `reports_feature_importances(model) == true`)\n * n_features::Real = 0: The number of features to select. If `0`, half of the features are selected. If a positive integer, the parameter is the absolute number of features to select. If a real number between 0 and 1, it is the fraction of features to select.\n * step::Real=1: If the value of step is at least 1, it signifies the quantity of features to eliminate in each iteration. Conversely, if step falls strictly within the range of 0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.\n\n# Operations\n\n * `transform(mach, X)`: transform the input table `X` into a new table containing only columns corresponding to features accepted by the RFE algorithm.\n * `predict(mach, X)`: transform the input table `X` into a new table same as in `transform(mach, X)` above and predict using the fitted base model on the transformed table.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_left`: names of features remaining after recursive feature elimination.\n * `model_fitresult`: fitted parameters of the base model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `scores`: dictionary of scores for each feature in the training dataset. The model deems highly scored variables more significant.\n * `model_report`: report for the fitted base model.\n\n# Examples\n\nThe following example assumes you have MLJDecisionTreeInterface in the active package ennvironment.\n\n```julia\nusing MLJ\n\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n\n# Creates a dataset where the target only depends on the first 5 columns of the input table.\nA = rand(50, 10);\ny = 10 .* sin.(\n pi .* A[:, 1] .* A[:, 2]\n ) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5];\nX = MLJ.table(A);\n\n# fit a rfe model:\nrf = RandomForestRegressor()\nselector = RecursiveFeatureElimination(rf, n_features=2)\nmach = machine(selector, X, y)\nfit!(mach)\n\n# view the feature importances\nfeature_importances(mach)\n\n# predict using the base model trained on the reduced feature set:\nXnew = MLJ.table(rand(50, 10));\npredict(mach, Xnew)\n\n# transform data with all features to the reduced feature set:\ntransform(mach, Xnew)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" +":package_name" = "FeatureSelection" +":name" = "RecursiveFeatureElimination" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nRecursiveFeatureElimination(model; n_features=0, step=1)\n```\n\nThis model implements a recursive feature elimination algorithm for feature selection. It recursively removes features, training a base model on the remaining features and evaluating their importance until the desired number of features is selected.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `rfe_model` to data with\n\n```julia\nmach = machine(rfe_model, X, y)\n```\n\nOR, if the base model supports weights, as\n\n```julia\nmach = machine(rfe_model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype as that required by the base model; check column scitypes with `schema(X)` and column scitypes required by base model with `input_scitype(basemodel)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous` or `Finite` depending on the `target_scitype` required by the base model; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * model: A base model with a `fit` method that provides information on feature feature importance (i.e `reports_feature_importances(model) == true`)\n * n_features::Real = 0: The number of features to select. If `0`, half of the features are selected. If a positive integer, the parameter is the absolute number of features to select. If a real number between 0 and 1, it is the fraction of features to select.\n * step::Real=1: If the value of step is at least 1, it signifies the quantity of features to eliminate in each iteration. Conversely, if step falls strictly within the range of 0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.\n\n# Operations\n\n * `transform(mach, X)`: transform the input table `X` into a new table containing only columns corresponding to features accepted by the RFE algorithm.\n * `predict(mach, X)`: transform the input table `X` into a new table same as in `transform(mach, X)` above and predict using the fitted base model on the transformed table.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_left`: names of features remaining after recursive feature elimination.\n * `model_fitresult`: fitted parameters of the base model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `scores`: dictionary of scores for each feature in the training dataset. The model deems highly scored variables more significant.\n * `model_report`: report for the fitted base model.\n\n# Examples\n\nThe following example assumes you have MLJDecisionTreeInterface in the active package ennvironment.\n\n```julia\nusing MLJ\n\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n\n# Creates a dataset where the target only depends on the first 5 columns of the input table.\nA = rand(50, 10);\ny = 10 .* sin.(\n pi .* A[:, 1] .* A[:, 2]\n ) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5];\nX = MLJ.table(A);\n\n# fit a rfe model:\nrf = RandomForestRegressor()\nselector = RecursiveFeatureElimination(rf, n_features=2)\nmach = machine(selector, X, y)\nfit!(mach)\n\n# view the feature importances\nfeature_importances(mach)\n\n# predict using the base model trained on the reduced feature set:\nXnew = MLJ.table(rand(50, 10));\npredict(mach, Xnew)\n\n# transform data with all features to the reduced feature set:\ntransform(mach, Xnew)\n```\n""" -":name" = "RecursiveFeatureElimination" -":human_name" = "probabilistic recursive feature elimination" -":tags" = [] -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [] -":hyperparameters" = "`(:model, :n_features, :step)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`RecursiveFeatureElimination`" [EvoLinear.EvoLinearRegressor] @@ -7235,233 +7605,11 @@ ":human_name" = "lof detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "LOFDetector" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.PCADetector] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.PCADetector" -":hyperparameters" = "`(:n_components, :n_selected_components, :copy, :whiten, :svd_solver, :tol, :iterated_power, :standardization, :weighted, :random_state)`" -":is_pure_julia" = "`false`" -":human_name" = "pca detector" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "PCADetector" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.INNEDetector] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.INNEDetector" -":hyperparameters" = "`(:n_estimators, :max_samples, :random_state)`" -":is_pure_julia" = "`false`" -":human_name" = "inne detector" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "INNEDetector" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.OCSVMDetector] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.OCSVMDetector" -":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :nu, :shrinking, :cache_size, :verbose, :max_iter)`" -":is_pure_julia" = "`false`" -":human_name" = "ocsvm detector" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "OCSVMDetector" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.ECODDetector] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\",)`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing,)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.ECODDetector" -":hyperparameters" = "`(:n_jobs,)`" -":is_pure_julia" = "`false`" -":human_name" = "ecod detector" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "ECODDetector" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.SODDetector] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.SODDetector" -":hyperparameters" = "`(:n_neighbors, :ref_set, :alpha)`" -":is_pure_julia" = "`false`" -":human_name" = "sod detector" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "SODDetector" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[OutlierDetectionPython.LODADetector] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LODADetector" -":hyperparameters" = "`(:n_bins, :n_random_cuts)`" -":is_pure_julia" = "`false`" -":human_name" = "loda detector" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" +":docstring" = """```julia\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "LODADetector" +":name" = "LOFDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7476,11 +7624,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.KDEDetector] +[OutlierDetectionPython.PCADetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" +":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7488,17 +7636,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.KDEDetector" -":hyperparameters" = "`(:bandwidth, :algorithm, :leaf_size, :metric, :metric_params)`" +":load_path" = "OutlierDetectionPython.PCADetector" +":hyperparameters" = "`(:n_components, :n_selected_components, :copy, :whiten, :svd_solver, :tol, :iterated_power, :standardization, :weighted, :random_state)`" ":is_pure_julia" = "`false`" -":human_name" = "kde detector" +":human_name" = "pca detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" +":docstring" = """```julia\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "KDEDetector" +":name" = "PCADetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7513,11 +7661,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.CDDetector] +[OutlierDetectionPython.INNEDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"PythonCall.Py\",)`" +":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing,)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7525,17 +7673,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.CDDetector" -":hyperparameters" = "`(:model,)`" +":load_path" = "OutlierDetectionPython.INNEDetector" +":hyperparameters" = "`(:n_estimators, :max_samples, :random_state)`" ":is_pure_julia" = "`false`" -":human_name" = "cd detector" +":human_name" = "inne detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" +":docstring" = """```julia\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "CDDetector" +":name" = "INNEDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7550,11 +7698,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.KNNDetector] +[OutlierDetectionPython.OCSVMDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" +":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7562,17 +7710,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.KNNDetector" -":hyperparameters" = "`(:n_neighbors, :method, :radius, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs)`" +":load_path" = "OutlierDetectionPython.OCSVMDetector" +":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :nu, :shrinking, :cache_size, :verbose, :max_iter)`" ":is_pure_julia" = "`false`" -":human_name" = "knn detector" +":human_name" = "ocsvm detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" +":docstring" = """```julia\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "KNNDetector" +":name" = "OCSVMDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7587,11 +7735,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.GMMDetector] +[OutlierDetectionPython.ECODDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" +":hyperparameter_types" = "`(\"Any\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7599,17 +7747,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.GMMDetector" -":hyperparameters" = "`(:n_components, :covariance_type, :tol, :reg_covar, :max_iter, :n_init, :init_params, :random_state, :warm_start)`" +":load_path" = "OutlierDetectionPython.ECODDetector" +":hyperparameters" = "`(:n_jobs,)`" ":is_pure_julia" = "`false`" -":human_name" = "gmm detector" +":human_name" = "ecod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" +":docstring" = """```julia\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "GMMDetector" +":name" = "ECODDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7624,11 +7772,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.COFDetector] +[OutlierDetectionPython.SODDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\")`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7636,17 +7784,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.COFDetector" -":hyperparameters" = "`(:n_neighbors, :method)`" +":load_path" = "OutlierDetectionPython.SODDetector" +":hyperparameters" = "`(:n_neighbors, :ref_set, :alpha)`" ":is_pure_julia" = "`false`" -":human_name" = "cof detector" +":human_name" = "sod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" +":docstring" = """```julia\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "COFDetector" +":name" = "SODDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7661,11 +7809,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.CBLOFDetector] +[OutlierDetectionPython.LODADetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7673,17 +7821,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.CBLOFDetector" -":hyperparameters" = "`(:n_clusters, :alpha, :beta, :use_weights, :random_state, :n_jobs)`" +":load_path" = "OutlierDetectionPython.LODADetector" +":hyperparameters" = "`(:n_bins, :n_random_cuts)`" ":is_pure_julia" = "`false`" -":human_name" = "cblof detector" +":human_name" = "loda detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" +":docstring" = """```julia\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "CBLOFDetector" +":name" = "LODADetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7698,11 +7846,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.LOCIDetector] +[OutlierDetectionPython.KDEDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Real\", \"Real\")`" +":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7710,17 +7858,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LOCIDetector" -":hyperparameters" = "`(:alpha, :k)`" +":load_path" = "OutlierDetectionPython.KDEDetector" +":hyperparameters" = "`(:bandwidth, :algorithm, :leaf_size, :metric, :metric_params)`" ":is_pure_julia" = "`false`" -":human_name" = "loci detector" +":human_name" = "kde detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" +":docstring" = """```julia\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "LOCIDetector" +":name" = "KDEDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7735,11 +7883,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.LMDDDetector] +[OutlierDetectionPython.CDDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" +":hyperparameter_types" = "`(\"PythonCall.Py\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7747,17 +7895,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LMDDDetector" -":hyperparameters" = "`(:n_iter, :dis_measure, :random_state)`" +":load_path" = "OutlierDetectionPython.CDDetector" +":hyperparameters" = "`(:model,)`" ":is_pure_julia" = "`false`" -":human_name" = "lmdd detector" +":human_name" = "cd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" +":docstring" = """```julia\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "LMDDDetector" +":name" = "CDDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7772,11 +7920,11 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[OutlierDetectionPython.RODDetector] +[OutlierDetectionPython.KNNDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\",)`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing,)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7784,17 +7932,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.RODDetector" -":hyperparameters" = "`(:parallel_execution,)`" +":load_path" = "OutlierDetectionPython.KNNDetector" +":hyperparameters" = "`(:n_neighbors, :method, :radius, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs)`" ":is_pure_julia" = "`false`" -":human_name" = "rod detector" +":human_name" = "knn detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" +":docstring" = """```julia\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "RODDetector" +":name" = "KNNDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7809,554 +7957,443 @@ ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -[SelfOrganizingMaps.SelfOrganizingMap] +[OutlierDetectionPython.GMMDetector] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" -":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "SelfOrganizingMaps.SelfOrganizingMap" -":hyperparameters" = "`(:k, :η, :σ², :grid_type, :η_decay, :σ_decay, :neighbor_function, :matching_distance, :Nepochs)`" -":is_pure_julia" = "`true`" -":human_name" = "self organizing map" +":load_path" = "OutlierDetectionPython.GMMDetector" +":hyperparameters" = "`(:n_components, :covariance_type, :tol, :reg_covar, :max_iter, :n_init, :init_params, :random_state, :warm_start)`" +":is_pure_julia" = "`false`" +":human_name" = "gmm detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nSelfOrganizingMap\n```\n\nA model type for constructing a self organizing map, based on [SelfOrganizingMaps.jl](https://github.com/john-waczak/SelfOrganizingMaps.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSelfOrganizingMap = @load SelfOrganizingMap pkg=SelfOrganizingMaps\n```\n\nDo `model = SelfOrganizingMap()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SelfOrganizingMap(k=...)`.\n\nSelfOrganizingMaps implements [Kohonen's Self Organizing Map](https://ieeexplore.ieee.org/abstract/document/58325?casa_token=pGue0TD38nAAAAAA:kWFkvMJQKgYOTJjJx-_bRx8n_tnWEpau2QeoJ1gJt0IsywAuvkXYc0o5ezdc2mXfCzoEZUQXSQ), Proceedings of the IEEE; Kohonen, T.; (1990):\"The self-organizing map\"\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X) where\n\n * `X`: an `AbstractMatrix` or `Table` of input features whose columns are of scitype `Continuous.`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=10`: Number of nodes along once side of SOM grid. There are `k²` total nodes.\n * `η=0.5`: Learning rate. Scales adjust made to winning node and its neighbors during each round of training.\n * `σ²=0.05`: The (squared) neighbor radius. Used to determine scale for neighbor node adjustments.\n * `grid_type=:rectangular` Node grid geometry. One of `(:rectangular, :hexagonal, :spherical)`.\n * `η_decay=:exponential` Learning rate schedule function. One of `(:exponential, :asymptotic)`\n * `σ_decay=:exponential` Neighbor radius schedule function. One of `(:exponential, :asymptotic, :none)`\n * `neighbor_function=:gaussian` Kernel function used to make adjustment to neighbor weights. Scale is set by `σ²`. One of `(:gaussian, :mexican_hat)`.\n * `matching_distance=euclidean` Distance function from `Distances.jl` used to determine winning node.\n * `Nepochs=1` Number of times to repeat training on the shuffled dataset.\n\n# Operations\n\n * `transform(mach, Xnew)`: returns the coordinates of the winning SOM node for each instance of `Xnew`. For SOM of grid*type `:rectangular` and `:hexagonal`, these are cartesian coordinates. For grid*type `:spherical`, these are the latitude and longitude in radians.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coords`: The coordinates of each of the SOM nodes (points in the domain of the map) with shape (k², 2)\n * `weights`: Array of weight vectors for the SOM nodes (corresponding points in the map's range) of shape (k², input dimension)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: the index of the winning node for each instance of the training data X interpreted as a class label\n\n# Examples\n\n```julia\nusing MLJ\nsom = @load SelfOrganizingMap pkg=SelfOrganizingMaps\nmodel = som()\nX, y = make_regression(50, 3) # synthetic data\nmach = machine(model, X) |> fit!\nX̃ = transform(mach, X)\n\nrpt = report(mach)\nclasses = rpt.classes\n```\n""" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/john-waczak/SelfOrganizingMaps.jl" -":package_name" = "SelfOrganizingMaps" -":name" = "SelfOrganizingMap" +":docstring" = """```julia\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "GMMDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":is_wrapper" = "`false`" - -[SymbolicRegression.SRTestRegressor] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "Apache-2.0" -":prediction_type" = ":deterministic" -":load_path" = "SymbolicRegression.MLJInterfaceModule.SRTestRegressor" -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":is_pure_julia" = "`true`" -":human_name" = "Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nSRTestRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSRTestRegressor = @load SRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}(1.0, false), LineSearches.BackTracking{Float64, Int64}(0.0001, 0.5, 0.1, 1000, 3, Inf, nothing), nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.DevNull()`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FRInt32}`\n\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":package_name" = "SymbolicRegression" -":name" = "SRTestRegressor" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`true`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[SymbolicRegression.MultitargetSRTestRegressor] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "Apache-2.0" -":prediction_type" = ":deterministic" -":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRTestRegressor" -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":is_pure_julia" = "`true`" -":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nMultitargetSRTestRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultitargetSRTestRegressor = @load MultitargetSRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultitargetSRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}(1.0, false), LineSearches.BackTracking{Float64, Int64}(0.0001, 0.5, 0.1, 1000, 3, Inf, nothing), nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.DevNull()`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FRInt32}`\n\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":package_name" = "SymbolicRegression" -":name" = "MultitargetSRTestRegressor" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [] -":deep_properties" = "`()`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[SymbolicRegression.MultitargetSRRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.COFDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "Apache-2.0" -":prediction_type" = ":deterministic" -":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRRegressor" -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":is_pure_julia" = "`true`" -":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.COFDetector" +":hyperparameters" = "`(:n_neighbors, :method)`" +":is_pure_julia" = "`false`" +":human_name" = "cof detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(defaults=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nOR\n\n```julia\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n""" +":docstring" = """```julia\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":package_name" = "SymbolicRegression" -":name" = "MultitargetSRRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "COFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[SymbolicRegression.SRRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.CBLOFDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "Apache-2.0" -":prediction_type" = ":deterministic" -":load_path" = "SymbolicRegression.MLJInterfaceModule.SRRegressor" -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":is_pure_julia" = "`true`" -":human_name" = "Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.CBLOFDetector" +":hyperparameters" = "`(:n_clusters, :alpha, :beta, :use_weights, :random_state, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "cblof detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(defaults=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nOR\n\n```julia\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n""" +":docstring" = """```julia\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":package_name" = "SymbolicRegression" -":name" = "SRRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "CBLOFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJMultivariateStatsInterface.LDA] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.LOCIDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.LDA" -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" -":is_pure_julia" = "`true`" -":human_name" = "linear discriminant analysis model" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.LOCIDetector" +":hyperparameters" = "`(:alpha, :k)`" +":is_pure_julia" = "`false`" +":human_name" = "loci detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```julia\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":docstring" = """```julia\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "LDA" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "LOCIDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJMultivariateStatsInterface.MultitargetLinearRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\",)`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing,)`" + +[OutlierDetectionPython.LMDDDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" -":hyperparameters" = "`(:bias,)`" -":is_pure_julia" = "`true`" -":human_name" = "multitarget linear regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.LMDDDetector" +":hyperparameters" = "`(:n_iter, :dis_measure, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "lmdd detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```julia\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" +":docstring" = """```julia\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "MultitargetLinearRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "LMDDDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJMultivariateStatsInterface.BayesianSubspaceLDA] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" + +[OutlierDetectionPython.RODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.BayesianSubspaceLDA" -":hyperparameters" = "`(:normalize, :outdim, :priors)`" -":is_pure_julia" = "`true`" -":human_name" = "Bayesian subspace LDA model" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.RODDetector" +":hyperparameters" = "`(:parallel_execution,)`" +":is_pure_julia" = "`false`" +":human_name" = "rod detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n\n`outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels (classes) consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The overall mean of the training data.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```julia\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n""" +":docstring" = """```julia\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "BayesianSubspaceLDA" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "RODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJMultivariateStatsInterface.FactorAnalysis] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Int64\", \"Real\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" + +[SelfOrganizingMaps.SelfOrganizingMap] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" +":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.FactorAnalysis" -":hyperparameters" = "`(:method, :maxoutdim, :maxiter, :tol, :eta, :mean)`" +":load_path" = "SelfOrganizingMaps.SelfOrganizingMap" +":hyperparameters" = "`(:k, :η, :σ², :grid_type, :η_decay, :σ_decay, :neighbor_function, :matching_distance, :Nepochs)`" ":is_pure_julia" = "`true`" -":human_name" = "factor analysis model" +":human_name" = "self organizing map" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```julia\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "FactorAnalysis" +":docstring" = """```julia\nSelfOrganizingMap\n```\n\nA model type for constructing a self organizing map, based on [SelfOrganizingMaps.jl](https://github.com/john-waczak/SelfOrganizingMaps.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSelfOrganizingMap = @load SelfOrganizingMap pkg=SelfOrganizingMaps\n```\n\nDo `model = SelfOrganizingMap()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SelfOrganizingMap(k=...)`.\n\nSelfOrganizingMaps implements [Kohonen's Self Organizing Map](https://ieeexplore.ieee.org/abstract/document/58325?casa_token=pGue0TD38nAAAAAA:kWFkvMJQKgYOTJjJx-_bRx8n_tnWEpau2QeoJ1gJt0IsywAuvkXYc0o5ezdc2mXfCzoEZUQXSQ), Proceedings of the IEEE; Kohonen, T.; (1990):\"The self-organizing map\"\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X) where\n\n * `X`: an `AbstractMatrix` or `Table` of input features whose columns are of scitype `Continuous.`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=10`: Number of nodes along once side of SOM grid. There are `k²` total nodes.\n * `η=0.5`: Learning rate. Scales adjust made to winning node and its neighbors during each round of training.\n * `σ²=0.05`: The (squared) neighbor radius. Used to determine scale for neighbor node adjustments.\n * `grid_type=:rectangular` Node grid geometry. One of `(:rectangular, :hexagonal, :spherical)`.\n * `η_decay=:exponential` Learning rate schedule function. One of `(:exponential, :asymptotic)`\n * `σ_decay=:exponential` Neighbor radius schedule function. One of `(:exponential, :asymptotic, :none)`\n * `neighbor_function=:gaussian` Kernel function used to make adjustment to neighbor weights. Scale is set by `σ²`. One of `(:gaussian, :mexican_hat)`.\n * `matching_distance=euclidean` Distance function from `Distances.jl` used to determine winning node.\n * `Nepochs=1` Number of times to repeat training on the shuffled dataset.\n\n# Operations\n\n * `transform(mach, Xnew)`: returns the coordinates of the winning SOM node for each instance of `Xnew`. For SOM of grid*type `:rectangular` and `:hexagonal`, these are cartesian coordinates. For grid*type `:spherical`, these are the latitude and longitude in radians.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coords`: The coordinates of each of the SOM nodes (points in the domain of the map) with shape (k², 2)\n * `weights`: Array of weight vectors for the SOM nodes (corresponding points in the map's range) of shape (k², input dimension)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: the index of the winning node for each instance of the training data X interpreted as a class label\n\n# Examples\n\n```julia\nusing MLJ\nsom = @load SelfOrganizingMap pkg=SelfOrganizingMaps\nmodel = som()\nX, y = make_regression(50, 3) # synthetic data\nmach = machine(model, X) |> fit!\nX̃ = transform(mach, X)\n\nrpt = report(mach)\nclasses = rpt.classes\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/john-waczak/SelfOrganizingMaps.jl" +":package_name" = "SelfOrganizingMaps" +":name" = "SelfOrganizingMap" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" -[MLJMultivariateStatsInterface.LinearRegressor] +[SymbolicRegression.SRTestRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\",)`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing,)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" +":package_license" = "Apache-2.0" ":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.LinearRegressor" -":hyperparameters" = "`(:bias,)`" +":load_path" = "SymbolicRegression.MLJInterfaceModule.SRTestRegressor" +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" ":is_pure_julia" = "`true`" -":human_name" = "linear regressor" +":human_name" = "Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```julia\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" +":docstring" = """```\nSRTestRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSRTestRegressor = @load SRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}(1.0, false), LineSearches.BackTracking{Float64, Int64}(0.0001, 0.5, 0.1, 1000, 3, Inf, nothing), nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.DevNull()`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FRInt32}`\n\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "LinearRegressor" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":package_name" = "SymbolicRegression" +":name" = "SRTestRegressor" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJMultivariateStatsInterface.ICA] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.ICA" -":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" -":is_pure_julia" = "`true`" -":human_name" = "independent component analysis model" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nICA = @load ICA pkg=MultivariateStats\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```julia\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "ICA" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[MLJMultivariateStatsInterface.PPCA] +[SymbolicRegression.MultitargetSRTestRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.PPCA" -":hyperparameters" = "`(:maxoutdim, :method, :maxiter, :tol, :mean)`" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "Apache-2.0" +":prediction_type" = ":deterministic" +":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRTestRegressor" +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" ":is_pure_julia" = "`true`" -":human_name" = "probabilistic PCA model" -":is_supervised" = "`false`" +":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nPPCA = @load PPCA pkg=MultivariateStats\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The model's loadings matrix. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` as as defined above.\n\n# Examples\n\n```julia\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "PPCA" -":target_in_fit" = "`false`" +":docstring" = """```\nMultitargetSRTestRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultitargetSRTestRegressor = @load MultitargetSRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultitargetSRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}(1.0, false), LineSearches.BackTracking{Float64, Int64}(0.0001, 0.5, 0.1, 1000, 3, Inf, nothing), nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.DevNull()`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FRInt32}`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":package_name" = "SymbolicRegression" +":name" = "MultitargetSRTestRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJMultivariateStatsInterface.RidgeRegressor] +[SymbolicRegression.MultitargetSRRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" +":package_license" = "Apache-2.0" ":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.RidgeRegressor" -":hyperparameters" = "`(:lambda, :bias)`" +":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRRegressor" +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" ":is_pure_julia" = "`true`" -":human_name" = "ridge regressor" +":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```julia\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" +":docstring" = """```julia\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(defaults=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nOR\n\n```julia\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "RidgeRegressor" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":package_name" = "SymbolicRegression" +":name" = "MultitargetSRRegressor" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJMultivariateStatsInterface.KernelPCA] +[SymbolicRegression.SRRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Function}\", \"Symbol\", \"Bool\", \"Real\", \"Real\", \"Int64\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.KernelPCA" -":hyperparameters" = "`(:maxoutdim, :kernel, :solver, :inverse, :beta, :tol, :maxiter)`" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "Apache-2.0" +":prediction_type" = ":deterministic" +":load_path" = "SymbolicRegression.MLJInterfaceModule.SRRegressor" +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" ":is_pure_julia" = "`true`" -":human_name" = "kernel prinicipal component analysis model" -":is_supervised" = "`false`" +":human_name" = "Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```julia\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "KernelPCA" -":target_in_fit" = "`false`" +":docstring" = """```julia\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(defaults=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nOR\n\n```julia\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":package_name" = "SymbolicRegression" +":name" = "SRRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJMultivariateStatsInterface.MultitargetRidgeRegressor] +[MLJMultivariateStatsInterface.LDA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.MultitargetRidgeRegressor" -":hyperparameters" = "`(:lambda, :bias)`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.LDA" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" ":is_pure_julia" = "`true`" -":human_name" = "multitarget ridge regressor" +":human_name" = "linear discriminant analysis model" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```julia\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n""" +":docstring" = """```julia\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```julia\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" ":package_name" = "MultivariateStats" -":name" = "MultitargetRidgeRegressor" +":name" = "LDA" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" @@ -8364,36 +8401,36 @@ ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJMultivariateStatsInterface.SubspaceLDA] +[MLJMultivariateStatsInterface.MultitargetLinearRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" +":hyperparameter_types" = "`(\"Bool\",)`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.SubspaceLDA" -":hyperparameters" = "`(:normalize, :outdim, :dist)`" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" +":hyperparameters" = "`(:bias,)`" ":is_pure_julia" = "`true`" -":human_name" = "subpace LDA model" +":human_name" = "multitarget linear regressor" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```julia\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":docstring" = """```julia\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```julia\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" ":package_name" = "MultivariateStats" -":name" = "SubspaceLDA" +":name" = "MultitargetLinearRegressor" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" @@ -8401,11 +8438,11 @@ ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJMultivariateStatsInterface.BayesianLDA] +[MLJMultivariateStatsInterface.BayesianSubspaceLDA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" +":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -8413,17 +8450,17 @@ ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" ":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.BayesianLDA" -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" +":load_path" = "MLJMultivariateStatsInterface.BayesianSubspaceLDA" +":hyperparameters" = "`(:normalize, :outdim, :priors)`" ":is_pure_julia" = "`true`" -":human_name" = "Bayesian LDA model" +":human_name" = "Bayesian subspace LDA model" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels (classes) consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```julia\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":docstring" = """```julia\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n\n`outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels (classes) consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The overall mean of the training data.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```julia\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" ":package_name" = "MultivariateStats" -":name" = "BayesianLDA" +":name" = "BayesianSubspaceLDA" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -8438,11 +8475,11 @@ ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJMultivariateStatsInterface.PCA] +[MLJMultivariateStatsInterface.FactorAnalysis] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Int64\", \"Real\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" @@ -8450,17 +8487,17 @@ ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.PCA" -":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" +":load_path" = "MLJMultivariateStatsInterface.FactorAnalysis" +":hyperparameters" = "`(:method, :maxoutdim, :maxiter, :tol, :eta, :mean)`" ":is_pure_julia" = "`true`" -":human_name" = "pca" +":human_name" = "factor analysis model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nPCA\n```\n\nA model type for constructing a pca, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nPCA = @load PCA pkg=MultivariateStats\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components. An AbstractVector of length `outdim`\n * `loadings`: The models loadings, weights for each variable used when calculating principal components. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```julia\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":docstring" = """```julia\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```julia\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" ":package_name" = "MultivariateStats" -":name" = "PCA" +":name" = "FactorAnalysis" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -8475,177 +8512,29 @@ ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[MLJLIBSVMInterface.OneClassSVM] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJLIBSVMInterface.OneClassSVM" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "one-class support vector machine" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```julia\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```julia\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```julia\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "OneClassSVM" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.EpsilonSVR] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.EpsilonSVR" -":hyperparameters" = "`(:kernel, :gamma, :epsilon, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "ϵ-support vector regressor" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nEpsilonSVR\n```\n\nA model type for constructing a ϵ-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM\n```\n\nDo `model = EpsilonSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EpsilonSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an adaptation of the classifier `SVC` to regression, but has an additional parameter `epsilon` (denoted $ϵ$ in the cited reference).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `epsilon=0.1` (range (0, `Inf`)): the parameter denoted $ϵ$ in the cited reference; `epsilon` is the thickness of the penalty-free neighborhood of the graph of the prediction function (\"slab\" or \"tube\"). Specifically, a data point `(x, y)` incurs no training loss unless it is outside this neighborhood; the further away it is from the this neighborhood, the greater the loss penalty.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM # model type\nmodel = EpsilonSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2512132502584155\n 0.007340201523624579\n -0.2482949812264707\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = EpsilonSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1121225361666656\n 0.04667702229741916\n -0.6958148424680672\n```\n\nSee also [`NuSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "EpsilonSVR" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.LinearSVC] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"LIBSVM.Linearsolver.LINEARSOLVER\", \"Float64\", \"Float64\", \"Float64\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.LinearSVC" -":hyperparameters" = "`(:solver, :tolerance, :cost, :bias)`" -":is_pure_julia" = "`false`" -":human_name" = "linear support vector classifier" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nLinearSVC\n```\n\nA model type for constructing a linear support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nLinearSVC = @load LinearSVC pkg=LIBSVM\n```\n\nDo `model = LinearSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearSVC(solver=...)`.\n\nReference for algorithm and core C-library: Rong-En Fan et al (2008): \"LIBLINEAR: A Library for Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf). \n\nThis model type is similar to `SVC` from the same package with the setting `kernel=LIBSVM.Kernel.KERNEL.Linear`, but is optimized for the linear case.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `solver=LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: linear solver, which must be one of the following from the LIBSVM.jl package:\n\n * `LIBSVM.Linearsolver.L2R_LR`: L2-regularized logistic regression (primal))\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: L2-regularized L2-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC`: L2-regularized L2-loss support vector classification (primal)\n * `LIBSVM.Linearsolver.L2R_L1LOSS_SVC_DUAL`: L2-regularized L1-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.MCSVM_CS`: support vector classification by Crammer and Singer) `LIBSVM.Linearsolver.L1R_L2LOSS_SVC`: L1-regularized L2-loss support vector classification)\n * `LIBSVM.Linearsolver.L1R_LR`: L1-regularized logistic regression\n * `LIBSVM.Linearsolver.L2R_LR_DUAL`: L2-regularized logistic regression (dual)\n * `tolerance::Float64=Inf`: tolerance for the stopping criterion;\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `bias= -1.0`: if `bias >= 0`, instance `x` becomes `[x; bias]`; if `bias < 0`, no bias term added (default -1)\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Examples\n\n```julia\nusing MLJ\nimport LIBSVM\n\nLinearSVC = @load LinearSVC pkg=LIBSVM # model type\nmodel = LinearSVC(solver=LIBSVM.Linearsolver.L2R_LR) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"versicolor\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the [`SVC`](@ref) and [`NuSVC`](@ref) classifiers, and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/liblinear/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "LinearSVC" -":target_in_fit" = "`true`" -":supports_class_weights" = "`true`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.ProbabilisticSVC] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJLIBSVMInterface.ProbabilisticSVC" -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "probabilistic C-support vector classifier" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nProbabilisticSVC\n```\n\nA model type for constructing a probabilistic C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticSVC(kernel=...)`.\n\nThis model is identical to [`SVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to the total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return probabilistic predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM # model type\nmodel = ProbabilisticSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00186, versicolor=>0.003, virginica=>0.995)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000563, versicolor=>0.0554, virginica=>0.944)\n UnivariateFinite{Multiclass{3}}(setosa=>1.4e-6, versicolor=>1.68e-6, virginica=>1.0)\n\n\njulia> labels = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`SVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref), and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "ProbabilisticSVC" -":target_in_fit" = "`true`" -":supports_class_weights" = "`true`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +[MLJMultivariateStatsInterface.LinearRegressor] ":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.NuSVR] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.NuSVR" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "ν-support vector regressor" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.LinearRegressor" +":hyperparameters" = "`(:bias,)`" +":is_pure_julia" = "`true`" +":human_name" = "linear regressor" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nNuSVR\n```\n\nA model type for constructing a ν-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNuSVR = @load NuSVR pkg=LIBSVM\n```\n\nDo `model = NuSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is a re-parameterization of `EpsilonSVR` in which the `epsilon` hyper-parameter is replaced with a new parameter `nu` (denoted $ν$ in the cited reference) which attempts to control the number of support vectors directly.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be\n\n called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Denoted $ν$ in the cited paper. Changing `nu` changes the thickness of some neighborhood of the graph of the prediction function (\"tube\" or \"slab\") and a training error is said to occur when a data point `(x, y)` lies outside of that neighborhood.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nNuSVR = @load NuSVR pkg=LIBSVM # model type\nmodel = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2008156459920009\n 0.1131520519131709\n -0.2076156254934889\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1211558175964662\n 0.06677125944808422\n -0.6817578942749346\n```\n\nSee also [`EpsilonSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":docstring" = """```julia\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```julia\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "NuSVR" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "LinearRegressor" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -8658,142 +8547,142 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.NuSVC] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" + +[MLJMultivariateStatsInterface.ICA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.NuSVC" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "ν-support vector classifier" -":is_supervised" = "`true`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJMultivariateStatsInterface.ICA" +":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "independent component analysis model" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nNuSVC\n```\n\nA model type for constructing a ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNuSVC = @load NuSVC pkg=LIBSVM\n```\n\nDo `model = NuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVC(kernel=...)`.\n\nThis model is a re-parameterization of the [`SVC`](@ref) classifier, where `nu` replaces `cost`, and is mathematically equivalent to it. The parameter `nu` allows more direct control over the number of support vectors (see under \"Hyper-parameters\").\n\nThis model always predicts actual class labels. For probabilistic predictions, use instead [`ProbabilisticNuSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nNuSVC = @load NuSVC pkg=LIBSVM # model type\nmodel = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\nSee also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "NuSVC" -":target_in_fit" = "`true`" +":docstring" = """```julia\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nICA = @load ICA pkg=MultivariateStats\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```julia\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "ICA" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.ProbabilisticNuSVC] +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJMultivariateStatsInterface.PPCA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "probabilistic ν-support vector classifier" -":is_supervised" = "`true`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJMultivariateStatsInterface.PPCA" +":hyperparameters" = "`(:maxoutdim, :method, :maxiter, :tol, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "probabilistic PCA model" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "ProbabilisticNuSVC" -":target_in_fit" = "`true`" +":docstring" = """```julia\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nPPCA = @load PPCA pkg=MultivariateStats\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The model's loadings matrix. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` as as defined above.\n\n# Examples\n\n```julia\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "PPCA" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJLIBSVMInterface.SVC] +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJMultivariateStatsInterface.RidgeRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" +":package_license" = "MIT" ":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.SVC" -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":is_pure_julia" = "`false`" -":human_name" = "C-support vector classifier" +":load_path" = "MLJMultivariateStatsInterface.RidgeRegressor" +":hyperparameters" = "`(:lambda, :bias)`" +":is_pure_julia" = "`true`" +":human_name" = "ridge regressor" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":docstring" = """```julia\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```julia\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":package_name" = "LIBSVM" -":name" = "SVC" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "RidgeRegressor" ":target_in_fit" = "`true`" -":supports_class_weights" = "`true`" +":supports_class_weights" = "`false`" ":supports_online" = "`false`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" -[MLJTransforms.Standardizer] +[MLJMultivariateStatsInterface.KernelPCA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Function}\", \"Symbol\", \"Bool\", \"Real\", \"Real\", \"Int64\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.Standardizer" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" +":load_path" = "MLJMultivariateStatsInterface.KernelPCA" +":hyperparameters" = "`(:maxoutdim, :kernel, :solver, :inverse, :beta, :tol, :maxiter)`" ":is_pure_julia" = "`true`" -":human_name" = "standardizer" +":human_name" = "kernel prinicipal component analysis model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nStandardizer = @load Standardizer pkg=MLJTransforms\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```julia\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "Standardizer" +":docstring" = """```julia\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```julia\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "KernelPCA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -8804,563 +8693,674 @@ ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[MLJTransforms.UnivariateTimeTypeToContinuous] +[MLJMultivariateStatsInterface.MultitargetRidgeRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateTimeTypeToContinuous" -":hyperparameters" = "`(:zero_time, :step)`" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.MultitargetRidgeRegressor" +":hyperparameters" = "`(:lambda, :bias)`" ":is_pure_julia" = "`true`" -":human_name" = "single variable transformer that creates continuous representations of temporally typed data" -":is_supervised" = "`false`" +":human_name" = "multitarget ridge regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJTransforms\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```julia\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateTimeTypeToContinuous" -":target_in_fit" = "`false`" +":docstring" = """```julia\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```julia\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "MultitargetRidgeRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[MLJMultivariateStatsInterface.SubspaceLDA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.SubspaceLDA" +":hyperparameters" = "`(:normalize, :outdim, :dist)`" +":is_pure_julia" = "`true`" +":human_name" = "subpace LDA model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```julia\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "SubspaceLDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.OneHotEncoder] +[MLJMultivariateStatsInterface.BayesianLDA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.OneHotEncoder" -":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.BayesianLDA" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" ":is_pure_julia" = "`true`" -":human_name" = "one-hot encoder" -":is_supervised" = "`false`" +":human_name" = "Bayesian LDA model" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOneHotEncoder = @load OneHotEncoder pkg=MLJTransforms\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (feature names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=false`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "OneHotEncoder" -":target_in_fit" = "`false`" +":docstring" = """```julia\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels (classes) consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```julia\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "BayesianLDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.ContinuousEncoder] +[MLJMultivariateStatsInterface.PCA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.ContinuousEncoder" -":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" +":load_path" = "MLJMultivariateStatsInterface.PCA" +":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" ":is_pure_julia" = "`true`" -":human_name" = "continuous encoder" +":human_name" = "pca" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nContinuousEncoder = @load ContinuousEncoder pkg=MLJTransforms\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "ContinuousEncoder" +":docstring" = """```julia\nPCA\n```\n\nA model type for constructing a pca, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nPCA = @load PCA pkg=MultivariateStats\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components. An AbstractVector of length `outdim`\n * `loadings`: The models loadings, weights for each variable used when calculating principal components. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```julia\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "PCA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[MLJTransforms.FrequencyEncoder] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +[MLJLIBSVMInterface.OneClassSVM] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.FrequencyEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" -":is_pure_julia" = "`true`" -":human_name" = "frequency encoder" +":load_path" = "MLJLIBSVMInterface.OneClassSVM" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "one-class support vector machine" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "FrequencyEncoder" +":docstring" = """```julia\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```julia\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```julia\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```julia\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "OneClassSVM" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJTransforms.TargetEncoder] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Union{Real, Symbol}\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" + +[MLJLIBSVMInterface.EpsilonSVR] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.TargetEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" -":is_pure_julia" = "`true`" -":human_name" = "target encoder" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.EpsilonSVR" +":hyperparameters" = "`(:kernel, :gamma, :epsilon, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ϵ-support vector regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using empirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",]\nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",]\nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable\ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "TargetEncoder" +":docstring" = """```julia\nEpsilonSVR\n```\n\nA model type for constructing a ϵ-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM\n```\n\nDo `model = EpsilonSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EpsilonSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an adaptation of the classifier `SVC` to regression, but has an additional parameter `epsilon` (denoted $ϵ$ in the cited reference).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `epsilon=0.1` (range (0, `Inf`)): the parameter denoted $ϵ$ in the cited reference; `epsilon` is the thickness of the penalty-free neighborhood of the graph of the prediction function (\"slab\" or \"tube\"). Specifically, a data point `(x, y)` incurs no training loss unless it is outside this neighborhood; the further away it is from the this neighborhood, the greater the loss penalty.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM # model type\nmodel = EpsilonSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2512132502584155\n 0.007340201523624579\n -0.2482949812264707\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = EpsilonSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1121225361666656\n 0.04667702229741916\n -0.6958148424680672\n```\n\nSee also [`NuSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "EpsilonSVR" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJLIBSVMInterface.LinearSVC] ":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"LIBSVM.Linearsolver.LINEARSOLVER\", \"Float64\", \"Float64\", \"Float64\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.LinearSVC" +":hyperparameters" = "`(:solver, :tolerance, :cost, :bias)`" +":is_pure_julia" = "`false`" +":human_name" = "linear support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nLinearSVC\n```\n\nA model type for constructing a linear support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nLinearSVC = @load LinearSVC pkg=LIBSVM\n```\n\nDo `model = LinearSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearSVC(solver=...)`.\n\nReference for algorithm and core C-library: Rong-En Fan et al (2008): \"LIBLINEAR: A Library for Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf). \n\nThis model type is similar to `SVC` from the same package with the setting `kernel=LIBSVM.Kernel.KERNEL.Linear`, but is optimized for the linear case.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `solver=LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: linear solver, which must be one of the following from the LIBSVM.jl package:\n\n * `LIBSVM.Linearsolver.L2R_LR`: L2-regularized logistic regression (primal))\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: L2-regularized L2-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC`: L2-regularized L2-loss support vector classification (primal)\n * `LIBSVM.Linearsolver.L2R_L1LOSS_SVC_DUAL`: L2-regularized L1-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.MCSVM_CS`: support vector classification by Crammer and Singer) `LIBSVM.Linearsolver.L1R_L2LOSS_SVC`: L1-regularized L2-loss support vector classification)\n * `LIBSVM.Linearsolver.L1R_LR`: L1-regularized logistic regression\n * `LIBSVM.Linearsolver.L2R_LR_DUAL`: L2-regularized logistic regression (dual)\n * `tolerance::Float64=Inf`: tolerance for the stopping criterion;\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `bias= -1.0`: if `bias >= 0`, instance `x` becomes `[x; bias]`; if `bias < 0`, no bias term added (default -1)\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Examples\n\n```julia\nusing MLJ\nimport LIBSVM\n\nLinearSVC = @load LinearSVC pkg=LIBSVM # model type\nmodel = LinearSVC(solver=LIBSVM.Linearsolver.L2R_LR) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"versicolor\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the [`SVC`](@ref) and [`NuSVC`](@ref) classifiers, and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/liblinear/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "LinearSVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[MLJTransforms.UnivariateBoxCoxTransformer] +[MLJLIBSVMInterface.ProbabilisticSVC] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "unknown" +":prediction_type" = ":probabilistic" +":load_path" = "MLJLIBSVMInterface.ProbabilisticSVC" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic C-support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nProbabilisticSVC\n```\n\nA model type for constructing a probabilistic C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticSVC(kernel=...)`.\n\nThis model is identical to [`SVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to the total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return probabilistic predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM # model type\nmodel = ProbabilisticSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00186, versicolor=>0.003, virginica=>0.995)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000563, versicolor=>0.0554, virginica=>0.944)\n UnivariateFinite{Multiclass{3}}(setosa=>1.4e-6, versicolor=>1.68e-6, virginica=>1.0)\n\n\njulia> labels = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`SVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref), and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "ProbabilisticSVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJLIBSVMInterface.NuSVR] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateBoxCoxTransformer" -":hyperparameters" = "`(:n, :shift)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable Box-Cox transformer" -":is_supervised" = "`false`" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.NuSVR" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ν-support vector regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJTransforms\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```julia\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```julia\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```julia\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateBoxCoxTransformer" -":target_in_fit" = "`false`" +":docstring" = """```julia\nNuSVR\n```\n\nA model type for constructing a ν-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNuSVR = @load NuSVR pkg=LIBSVM\n```\n\nDo `model = NuSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is a re-parameterization of `EpsilonSVR` in which the `epsilon` hyper-parameter is replaced with a new parameter `nu` (denoted $ν$ in the cited reference) which attempts to control the number of support vectors directly.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be\n\n called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Denoted $ν$ in the cited paper. Changing `nu` changes the thickness of some neighborhood of the graph of the prediction function (\"tube\" or \"slab\") and a training error is said to occur when a data point `(x, y)` lies outside of that neighborhood.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nNuSVR = @load NuSVR pkg=LIBSVM # model type\nmodel = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2008156459920009\n 0.1131520519131709\n -0.2076156254934889\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1211558175964662\n 0.06677125944808422\n -0.6817578942749346\n```\n\nSee also [`EpsilonSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "NuSVR" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" - -[MLJTransforms.InteractionTransformer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJLIBSVMInterface.NuSVC] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Static`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.InteractionTransformer" -":hyperparameters" = "`(:order, :features)`" -":is_pure_julia" = "`true`" -":human_name" = "interaction transformer" -":is_supervised" = "`false`" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.NuSVC" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ν-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nInteractionTransformer = @load InteractionTransformer pkg=MLJTransforms\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```julia\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```julia\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "InteractionTransformer" -":target_in_fit" = "`false`" +":docstring" = """```julia\nNuSVC\n```\n\nA model type for constructing a ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNuSVC = @load NuSVC pkg=LIBSVM\n```\n\nDo `model = NuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVC(kernel=...)`.\n\nThis model is a re-parameterization of the [`SVC`](@ref) classifier, where `nu` replaces `cost`, and is mathematically equivalent to it. The parameter `nu` allows more direct control over the number of support vectors (see under \"Hyper-parameters\").\n\nThis model always predicts actual class labels. For probabilistic predictions, use instead [`ProbabilisticNuSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nNuSVC = @load NuSVC pkg=LIBSVM # model type\nmodel = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\nSee also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "NuSVC" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJTransforms.UnivariateDiscretizer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\",)`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing,)`" + +[MLJLIBSVMInterface.SVC] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateDiscretizer" -":hyperparameters" = "`(:n_classes,)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" -":is_supervised" = "`false`" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.SVC" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "C-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJTransforms\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```julia\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateDiscretizer" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" +":docstring" = """```julia\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```julia\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "SVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" -":constructor" = "`nothing`" - -[MLJTransforms.CardinalityReducer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" + +[MLJLIBSVMInterface.ProbabilisticNuSVC] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.CardinalityReducer" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" -":is_pure_julia" = "`true`" -":human_name" = "cardinality reducer" -":is_supervised" = "`false`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic ν-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency `< min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be an integer or a float which decides whether raw counts or normalized frequencies are used.\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A dictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "CardinalityReducer" -":target_in_fit" = "`false`" +":docstring" = """```julia\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```julia\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```julia\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```julia\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "ProbabilisticNuSVC" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJTransforms.OrdinalEncoder] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" + +[MLJFlux.EntityEmbedder] +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"Union{MLJFlux.MLJFluxDeterministic, MLJFlux.MLJFluxProbabilistic}\",)`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.OrdinalEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" +":load_path" = "MLJFlux.EntityEmbedder" +":hyperparameters" = "`(:model,)`" ":is_pure_julia" = "`true`" -":human_name" = "ordinal encoder" +":human_name" = "entity embedder" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "OrdinalEncoder" -":target_in_fit" = "`false`" +":docstring" = """```julia\nEntityEmbedder(; model=supervised_mljflux_model)\n```\n\nWrapper for a MLJFlux supervised model, to convert it to a transformer. Such transformers are still presented a target variable in training, but they behave as transformers in MLJ pipelines. They are entity embedding transformers, in the sense of the article, \"Entity Embeddings of Categorical Variables\" by Cheng Guo, Felix Berkhahn.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(embed_model, X, y)\n```\n\nHere:\n\n * `embed_model` is an instance of `EntityEmbedder`, which wraps a supervised MLJFlux model, `model`, which must be an instance of one of these: `MLJFlux.NeuralNetworkClassifier`, `NeuralNetworkBinaryClassifier`, `MLJFlux.NeuralNetworkRegressor`,`MLJFlux.MultitargetNeuralNetworkRegressor`.\n * `X` is any table of input features supported by the model being wrapped. Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n * `y` is the target, which can be any `AbstractVector` supported by the model being wrapped.\n\nTrain the machine using `fit!(mach)`.\n\n# Examples\n\nIn the following example we wrap a `NeuralNetworkClassifier` as an `EntityEmbedder`, so that it can be used to supply continuously encoded features to a nearest neighbor model, which does not support categorical features.\n\n## Simple Example\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# Train and transform the data using the embedder:\nmach = machine(emb, X, y)\nfit!(mach)\nXnew = transform(mach, X)\n\n# Compare schemas before and after transformation\nschema(X)\nschema(Xnew)\n```\n\n## Using with Downstream Models (Pipeline)\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Other supervised model type, requiring `Continuous` features:\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# Now construct the pipeline:\npipe = emb |> KNNClassifier()\n\n# And train it to make predictions:\nmach = machine(pipe, X, y)\nfit!(mach)\npredict(mach, X)[1:3]\n```\n\nIt is to be emphasized that the `NeuralNertworkClassifier` is only being used to learn entity embeddings, not to make predictions, which here are made by `KNNClassifier()`.\n\nSee also [`NeuralNetworkClassifier`, `NeuralNetworkRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/FluxML/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "EntityEmbedder" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":training_losses", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.FillImputer] +[MLJFlux.MultitargetNeuralNetworkRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.FillImputer" -":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" +":prediction_type" = ":deterministic" +":load_path" = "MLJFlux.MultitargetNeuralNetworkRegressor" +":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" ":is_pure_julia" = "`true`" -":human_name" = "fill imputer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nFillImputer = @load FillImputer pkg=MLJTransforms\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```julia\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "FillImputer" -":target_in_fit" = "`false`" +":human_name" = "multitarget neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nMultitargetNeuralNetworkRegressor\n```\n\nA model type for constructing a multitarget neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = MultitargetNeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetNeuralNetworkRegressor(builder=...)`.\n\n`MultitargetNeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a multi-valued `Continuous` target, represented as a table, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any table or matrix of output targets whose element scitype is `Continuous`; check column scitypes with `schema(y)`. If `y` is a `Matrix`, it is assumed to have columns corresponding to variables and rows corresponding to observations.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `Linear`, `Short`, and `MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we apply a multi-target regression model to synthetic data:\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we generate some synthetic data (needs MLJBase 0.20.16 or higher):\n\n```julia\nX, y = make_regression(100, 9; n_targets = 2) # both tables\nschema(y)\nschema(X)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features and `n_out` the number of target variables (both known at `fit!` time), while `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating the regression model:\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor\nmodel = MultitargetNeuralNetworkRegressor(builder=builder, rng=123, epochs=20)\n```\n\nWe will arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\nFor experimenting with learning rate, see the [`NeuralNetworkRegressor`](@ref) example.\n\n```julia\npipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we can now compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=multitarget_l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # trains on all data `(X, y)`\nyhat = predict(mach, Xtest)\nmultitarget_l2(yhat, ytest)\n```\n\nSee also [`NeuralNetworkRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "MultitargetNeuralNetworkRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.MissingnessEncoder] +[MLJFlux.NeuralNetworkClassifier] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.MissingnessEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJFlux.NeuralNetworkClassifier" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" ":is_pure_julia" = "`true`" -":human_name" = "missingness encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A dictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "MissingnessEncoder" -":target_in_fit" = "`false`" +":human_name" = "neural network classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nNeuralNetworkClassifier\n```\n\nA model type for constructing a neural network classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkClassifier(builder=...)`.\n\n`NeuralNetworkClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a `Multiclass` or `OrderedFactor` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass` or `OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights.] Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ\nusing Flux\nimport RDatasets\nimport Optimisers\n```\n\nFirst, we can load the data:\n\n```julia\niris = RDatasets.dataset(\"datasets\", \"iris\");\ny, X = unpack(iris, ==(:Species), rng=123); # a vector and a table\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\nclf = NeuralNetworkClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(clf, X, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia\nclf.optimiser = Optimisers.Adam(clf.optimiser.eta * 2)\nclf.epochs = clf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(clf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(clf, X, y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy)\nusing Plots\nplot(curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\")\n\n```\n\nSee also [`ImageClassifier`](@ref), [`NeuralNetworkBinaryClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.ContrastEncoder] +[MLJFlux.ImageClassifier] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Image}, AbstractVector{<:ScientificTypesBase.Multiclass}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.ContrastEncoder" -":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJFlux.ImageClassifier" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration)`" ":is_pure_julia" = "`true`" -":human_name" = "contrast encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```julia\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or in clude from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded.\n\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`. If `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname,k)`, where `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n * `ignore=true`: Whether to exclude or include the features given in `features`\n\n * `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `encoded_features`: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false,\n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "ContrastEncoder" -":target_in_fit" = "`false`" +":human_name" = "image classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nImageClassifier\n```\n\nA model type for constructing a image classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\n```\n\nDo `model = ImageClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ImageClassifier(builder=...)`.\n\n`ImageClassifier` classifies images using a neural network adapted to the type of images provided (color or gray scale). Predictions are probabilistic. Users provide a recipe for constructing the network, based on properties of the image encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any `AbstractVector` of images with `ColorImage` or `GrayImage` scitype; check the scitype with `scitype(X)` and refer to ScientificTypes.jl documentation on coercing typical image formats into an appropriate type.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder`: An MLJFlux builder that constructs the neural network. The fallback builds a depth-16 VGG architecture adapted to the image size and number of target classes, with no batch normalization; see the Metalhead.jl documentation for details. See the example below for a user-specified builder. A convenience macro `@builder` is also available. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we use MLJFlux and a custom builder to classify the MNIST image dataset.\n\n```julia\nusing MLJ\nusing Flux\nimport MLJFlux\nimport Optimisers\nimport MLJIteration # for `skip` control\n```\n\nFirst we want to download the MNIST dataset, and unpack into images and labels:\n\n```julia\nimport MLDatasets: MNIST\ndata = MNIST(split=:train)\nimages, labels = data.features, data.targets\n```\n\nIn MLJ, integers cannot be used for encoding categorical data, so we must coerce them into the `Multiclass` scitype:\n\n```julia\nlabels = coerce(labels, Multiclass);\n```\n\nAbove `images` is a single array but MLJFlux requires the images to be a vector of individual image arrays:\n\n```julia\nimages = coerce(images, GrayImage);\nimages[1]\n```\n\nWe start by defining a suitable `builder` object. This is a recipe for building the neural network. Our builder will work for images of any (constant) size, whether they be color or black and white (ie, single or multi-channel). The architecture always consists of six alternating convolution and max-pool layers, and a final dense layer; the filter size and the number of channels after each convolution layer is customizable.\n\n```julia\nimport MLJFlux\n\nstruct MyConvBuilder\n filter_size::Int\n channels1::Int\n channels2::Int\n channels3::Int\nend\n\nmake2d(x::AbstractArray) = reshape(x, :, size(x)[end])\n\nfunction MLJFlux.build(b::MyConvBuilder, rng, n_in, n_out, n_channels)\n k, c1, c2, c3 = b.filter_size, b.channels1, b.channels2, b.channels3\n mod(k, 2) == 1 || error(\"`filter_size` must be odd. \")\n p = div(k - 1, 2) # padding to preserve image size\n init = Flux.glorot_uniform(rng)\n front = Chain(\n Conv((k, k), n_channels => c1, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c1 => c2, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c2 => c3, pad=(p, p), relu, init=init),\n MaxPool((2 ,2)),\n make2d)\n d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first\n return Chain(front, Dense(d, n_out, init=init))\nend\n```\n\nIt is important to note that in our `build` function, there is no final `softmax`. This is applied by default in all MLJFlux classifiers (override this using the `finaliser` hyperparameter).\n\nNow that our builder is defined, we can instantiate the actual MLJFlux model. If you have a GPU, you can substitute in `acceleration=CUDALibs()` below to speed up training.\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\nclf = ImageClassifier(builder=MyConvBuilder(3, 16, 32, 32),\n batch_size=50,\n epochs=10,\n rng=123)\n```\n\nYou can add Flux options such as `optimiser` and `loss` in the snippet above. Currently, `loss` must be a flux-compatible loss, and not an MLJ measure.\n\nNext, we can bind the model with the data in a machine, and train using the first 500 images:\n\n```julia\nmach = machine(clf, images, labels);\nfit!(mach, rows=1:500, verbosity=2);\nreport(mach)\nchain = fitted_params(mach)\nFlux.params(chain)[2]\n```\n\nWe can tack on 20 more epochs by modifying the `epochs` field, and iteratively fit some more:\n\n```julia\nclf.epochs = clf.epochs + 20\nfit!(mach, rows=1:500, verbosity=2);\n```\n\nWe can also make predictions and calculate an out-of-sample loss estimate, using any MLJ measure (loss/score):\n\n```julia\npredicted_labels = predict(mach, rows=501:1000);\ncross_entropy(predicted_labels, labels[501:1000])\n```\n\nThe preceding `fit!`/`predict`/evaluate workflow can be alternatively executed as follows:\n\n```julia\nevaluate!(mach,\n resampling=Holdout(fraction_train=0.5),\n measure=cross_entropy,\n rows=1:1000,\n verbosity=0)\n```\n\nSee also [`NeuralNetworkClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "ImageClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Multiclass}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Multiclass}`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.UnivariateStandardizer] +[MLJFlux.NeuralNetworkBinaryClassifier] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`()`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`()`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Binary}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateStandardizer" -":hyperparameters" = "`()`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJFlux.NeuralNetworkBinaryClassifier" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" ":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateStandardizer" -":target_in_fit" = "`false`" +":human_name" = "neural network binary classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nNeuralNetworkBinaryClassifier\n```\n\nA model type for constructing a neural network binary classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkBinaryClassifier(builder=...)`.\n\n`NeuralNetworkBinaryClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a binary (`Multiclass{2}` or `OrderedFactor{2}`) target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass{2}` or `OrderedFactor{2}`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.binarycrossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.binarycrossentropy`: Standard binary classification loss, also known as the log loss.\n * `Flux.logitbinarycrossentropy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `σ` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default sigmoid finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.binary_focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.σ`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.σ`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ, Flux\nimport Optimisers\nimport RDatasets\n```\n\nFirst, we can load the data:\n\n```julia\nmtcars = RDatasets.dataset(\"datasets\", \"mtcars\");\ny, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec]));\n```\n\nNote that `y` is a vector and `X` a table.\n\n```julia\ny = categorical(y) # classifier takes catogorical input\nX_f32 = Float32.(X) # To match floating point type of the neural network layers\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\nbclf = NeuralNetworkBinaryClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(bclf, X_f32, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia-repl\njulia> bclf.optimiser\nAdam(0.001, (0.9, 0.999), 1.0e-8)\n```\n\n```julia\nbclf.optimiser = Optimisers.Adam(eta = bclf.optimiser.eta * 2)\nbclf.epochs = bclf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X_f32), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(bclf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(\n bclf,\n X_f32,\n y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy,\n)\nusing Plots\nplot(\n curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\",\n)\n\n```\n\nSee also [`ImageClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkBinaryClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] -":deep_properties" = "`()`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.UnivariateFillImputer] +[MLJFlux.NeuralNetworkRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" -":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateFillImputer" -":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" +":prediction_type" = ":deterministic" +":load_path" = "MLJFlux.NeuralNetworkRegressor" +":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" ":is_pure_julia" = "`true`" -":human_name" = "single variable fill imputer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJTransforms\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```julia\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateFillImputer" -":target_in_fit" = "`false`" +":human_name" = "neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```julia\nNeuralNetworkRegressor\n```\n\nA model type for constructing a neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkRegressor(builder=...)`.\n\n`NeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a `Continuous` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```julia\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increasing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalized if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a regression model for the Boston house price dataset.\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we load in the data: The `:MEDV` column becomes the target vector `y`, and all remaining columns go into a table `X`, with the exception of `:CHAS`:\n\n```julia\ndata = OpenML.load(531); # Loads from https://www.openml.org/d/531\ny, X = unpack(data, ==(:MEDV), !=(:CHAS); rng=123);\n\nscitype(y)\nschema(X)\n```\n\nSince MLJFlux models do not handle ordered factors, we'll treat `:RAD` as `Continuous`:\n\n```julia\nX = coerce(X, :RAD=>Continuous)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features (which will be known at `fit!` time) and `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below). We also have the parameter `n_out` which is the number of output features. As we are doing single target regression, the value passed will always be `1`, but the builder we define will also work for [`MultitargetNeuralNetworkRegressor`](@ref).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating a model:\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\nmodel = NeuralNetworkRegressor(\n builder=builder,\n rng=123,\n epochs=20\n)\n```\n\nWe arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`.\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\n## Experimenting with learning rate\n\nWe can visually compare how the learning rate affects the predictions:\n\n```julia\nusing Plots\n\nrates = rates = [5e-5, 1e-4, 0.005, 0.001, 0.05]\nplt=plot()\n\nforeach(rates) do η\n pipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(η)\n fit!(mach, force=true, verbosity=0)\n losses =\n report(mach).transformed_target_model_deterministic.model.training_losses[3:end]\n plot!(1:length(losses), losses, label=η)\nend\n\nplt\n\npipe.transformed_target_model_deterministic.model.optimiser.eta = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # train on `(X, y)`\nyhat = predict(mach, Xtest)\nl2(yhat, ytest)\n```\n\nThese losses, for the pipeline model, refer to the target on the original, unstandardized, scale.\n\nFor implementing stopping criterion and other iteration controls, refer to examples linked from the MLJFlux documentation.\n\nSee also [`MultitargetNeuralNetworkRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" +":implemented_methods" = [":predict"] +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" [MLJEnsembles.EnsembleModel]