Skip to content

Commit

Permalink
Merge pull request #1128 from JuliaAI/dev
Browse files Browse the repository at this point in the history
For a 0.20.6 release
  • Loading branch information
ablaom authored Jun 6, 2024
2 parents a0d7a08 + 33d8852 commit 7b3b12c
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 74 deletions.
45 changes: 42 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
name = "MLJ"
uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
authors = ["Anthony D. Blaom <[email protected]>"]
version = "0.20.5"
version = "0.20.6"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
FeatureSelection = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLJBalancing = "45f359ea-796d-4f51-95a5-deb1a414c586"
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
Expand All @@ -31,12 +32,13 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
CategoricalArrays = "0.8,0.9, 0.10"
ComputationalResources = "0.3"
Distributions = "0.21,0.22,0.23, 0.24, 0.25"
FeatureSelection = "0.1.1"
MLJBalancing = "0.1"
MLJBase = "1"
MLJEnsembles = "0.4"
MLJFlow = "0.5"
MLJIteration = "0.6"
MLJModels = "0.16"
MLJModels = "0.17"
MLJTestIntegration = "0.5.0"
MLJTuning = "0.8"
OpenML = "0.2,0.3"
Expand Down Expand Up @@ -89,4 +91,41 @@ SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["BetaML", "CatBoost", "EvoLinear", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "PartitionedLS", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Suppressor","Test"]
test = [
"BetaML",
"CatBoost",
"EvoLinear",
"EvoTrees",
"Imbalance",
"InteractiveUtils",
"LightGBM",
"MLJClusteringInterface",
"MLJDecisionTreeInterface",
"MLJFlux",
"MLJGLMInterface",
"MLJLIBSVMInterface",
"MLJLinearModels",
"MLJMultivariateStatsInterface",
"MLJNaiveBayesInterface",
"MLJScikitLearnInterface",
"MLJTSVDInterface",
"MLJTestInterface",
"MLJTestIntegration",
"MLJText",
"MLJXGBoostInterface",
"Markdown",
"NearestNeighborModels",
"OneRule",
"OutlierDetectionNeighbors",
"OutlierDetectionPython",
"ParallelKMeans",
"PartialLeastSquaresRegressor",
"PartitionedLS",
"SelfOrganizingMaps",
"SIRUS",
"SymbolicRegression",
"StableRNGs",
"Suppressor",
"Test",
]

36 changes: 23 additions & 13 deletions docs/ModelDescriptors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ AutoEncoder_BetaML = ["dimension_reduction"]
BM25Transformer_MLJText = ["encoders", "text_analysis"]
BaggingClassifier_MLJScikitLearnInterface = ["classification", "ensemble_models"]
BaggingRegressor_MLJScikitLearnInterface = ["regression", "ensemble_models"]
BalancedBaggingClassifier_MLJBalancing = ["class_imbalance", "classification"]
BalancedBaggingClassifier_MLJBalancing = ["class_imbalance", "classification", "meta_algorithms"]
BinaryThresholdPredictor_MLJModels = ["meta_algorithms", "classification"]
BalancedModel_MLJBalancing = ["class_imbalance", "meta_algorithms"]
BayesianLDA_MultivariateStats = ["dimension_reduction", "classification", "Bayesian_models"]
BayesianLDA_MLJScikitLearnInterface = ["dimension_reduction", "classification", "Bayesian_models"]
BayesianQDA_MLJScikitLearnInterface = ["dimension_reduction", "classification", "Bayesian_models"]
Expand Down Expand Up @@ -52,6 +54,7 @@ ElasticNetCVRegressor_MLJScikitLearnInterface = ["regression"]
ElasticNetRegressor_MLJLinearModels = ["regression"]
ElasticNetRegressor_MLJScikitLearnInterface = ["regression"]
ENNUndersampler_Imbalance = ["class_imbalance"]
EnsembleModel_MLJEnsembles = ["ensemble_models", "meta_algorithms"]
EpsilonSVR_LIBSVM = ["regression"]
EvoLinearRegressor_EvoLinear = ["regression"]
EvoTreeClassifier_EvoTrees = ["classification", "ensemble_models", "iterative_models"]
Expand All @@ -63,8 +66,8 @@ EvoSplineRegressor_EvoLinear = ["regression", "ensemble_models", "iterative_mode
ExtraTreesClassifier_MLJScikitLearnInterface = ["classification", "iterative_models"]
ExtraTreesRegressor_MLJScikitLearnInterface = ["regression", "iterative_models"]
FactorAnalysis_MultivariateStats = ["dimension_reduction", ]
FeatureAgglomeration_MLJScikitLearnInterface = ["clustering", "static_models"]
FeatureSelector_MLJModels = ["dimension_reduction", ]
FeatureAgglomeration_MLJScikitLearnInterface = ["clustering", "static_models", "feature_engineering"]
FeatureSelector_FeatureSelection = ["dimension_reduction", "feature_engineering"]
FillImputer_MLJModels = ["missing_value_imputation", ]
GaussianMixtureClusterer_BetaML = ["clustering", "distribution_fitter"]
GaussianMixtureImputer_BetaML = ["missing_value_imputation", "distribution_fitter"]
Expand All @@ -88,7 +91,8 @@ ICA_MultivariateStats = ["encoders"]
IForestDetector_OutlierDetectionPython = ["outlier_detection"]
ImageClassifier_MLJFlux = ["classification", "image_processing", "iterative_models"]
INNEDetector_OutlierDetectionPython = ["outlier_detection"]
InteractionTransformer_MLJModels = ["static_models"]
InteractionTransformer_MLJModels = ["static_models", "feature_engineering"]
IteratedModel_MLJIteration = ["iterative_models", "meta_algorithms"]
KDEDetector_OutlierDetectionPython = ["outlier_detection"]
KMeansClusterer_BetaML = ["clustering"]
KMeans_Clustering = ["clustering", "dimension_reduction", ]
Expand All @@ -104,7 +108,7 @@ KNeighborsClassifier_MLJScikitLearnInterface = ["classification"]
KNeighborsRegressor_MLJScikitLearnInterface = ["regression"]
KPLSRegressor_PartialLeastSquaresRegressor = ["regression"]
KernelPCA_MultivariateStats = ["dimension_reduction", ]
KernelPerceptronClassifier_BetaML = ["classification"]
KernelPerceptronClassifier_BetaML = ["classification", "neural networks"]
LADRegressor_MLJLinearModels = ["regression"]
LDA_MultivariateStats = ["classification", "dimension_reduction", ]
LGBMClassifier_LightGBM = ["classification", "ensemble_models", "iterative_models"]
Expand Down Expand Up @@ -146,14 +150,14 @@ MultitargetGaussianMixtureRegressor_BetaML = ["regression", "distribution_fitter
MultitargetKNNClassifier_NearestNeighborModels = ["classification"]
MultitargetKNNRegressor_NearestNeighborModels = ["regression"]
MultitargetLinearRegressor_MultivariateStats = ["regression"]
MultitargetNeuralNetworkRegressor_BetaML = ["regression"]
MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"]
MultitargetNeuralNetworkRegressor_BetaML = ["regression", "neural networks"]
MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models", "neural networks"]
MultitargetRidgeRegressor_MultivariateStats = ["regression"]
MultitargetSRRegressor_SymbolicRegression = ["regression"]
NeuralNetworkClassifier_BetaML = ["classification"]
NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models"]
NeuralNetworkRegressor_BetaML = ["regression"]
NeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"]
NeuralNetworkClassifier_BetaML = ["classification", "neural networks"]
NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models", "neural networks"]
NeuralNetworkRegressor_BetaML = ["regression", "neural networks"]
NeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models", "neural networks"]
NuSVC_LIBSVM = ["classification"]
NuSVR_LIBSVM = ["regression"]
OCSVMDetector_OutlierDetectionPython = ["outlier_detection"]
Expand All @@ -171,8 +175,9 @@ PartLS_PartitionedLS = ["regression"]
PassiveAggressiveClassifier_MLJScikitLearnInterface = ["classification"]
PassiveAggressiveRegressor_MLJScikitLearnInterface = ["regression"]
PegasosClassifier_BetaML = ["classification"]
PerceptronClassifier_BetaML = ["classification", "iterative_models"]
PerceptronClassifier_MLJScikitLearnInterface = ["classification", "iterative_models"]
PerceptronClassifier_BetaML = ["classification", "iterative_models", "neural networks"]
PerceptronClassifier_MLJScikitLearnInterface = ["classification", "iterative_models", "neural networks"]
Pipeline_MLJBase = ["meta_algorithms"]
ProbabilisticNuSVC_LIBSVM = ["classification"]
ProbabilisticSGDClassifier_MLJScikitLearnInterface = ["classification"]
ProbabilisticSVC_LIBSVM = ["classification"]
Expand All @@ -190,6 +195,8 @@ RandomForestImputer_BetaML = ["missing_value_imputation", "ensemble_models", "it
RandomForestRegressor_BetaML = ["regression", "ensemble_models", "iterative_models"]
RandomForestRegressor_DecisionTree = ["regression", "ensemble_models", "iterative_models"]
RandomForestRegressor_MLJScikitLearnInterface = ["regression", "ensemble_models", "iterative_models"]
RecursiveFeatureElimination_FeatureSelection = ["dimension_reduction", "meta_algorithms", "feature_engineering"]
Resampler_MLJBase = ["meta_algorithms"]
RidgeCVClassifier_MLJScikitLearnInterface = ["classification"]
RidgeCVRegressor_MLJScikitLearnInterface = ["classification"]
RidgeClassifier_MLJScikitLearnInterface = ["classification"]
Expand All @@ -210,6 +217,7 @@ StableForestClassifier_SIRUS = ["classification"]
StableForestRegressor_SIRUS = ["regression"]
StableRulesClassifier_SIRUS = ["classification"]
StableRulesRegressor_SIRUS = ["regression"]
Stack_MLJBase = ["meta_algorithms", "ensemble_models"]
SVC_LIBSVM = ["classification"]
SVMClassifier_MLJScikitLearnInterface = ["classification"]
SVMLinearClassifier_MLJScikitLearnInterface = ["classification"]
Expand All @@ -222,9 +230,11 @@ SpectralClustering_MLJScikitLearnInterface = ["clustering", "static_models"]
Standardizer_MLJModels = ["encoders"]
SubspaceLDA_MultivariateStats = ["classification", "dimension_reduction"]
TomekUndersampler_Imbalance = ["class_imbalance"]
TunedModel_MLJTuning = ["meta_algorithms"]
TSVDTransformer_TSVD = ["dimension_reduction"]
TfidfTransformer_MLJText = ["encoders", "text_analysis"]
TheilSenRegressor_MLJScikitLearnInterface = ["regression"]
TransformedTargetModel_MLJBase = ["meta_algorithms", "outlier_detection"]
UnivariateBoxCoxTransformer_MLJModels = ["encoders"]
UnivariateDiscretizer_MLJModels = ["encoders"]
UnivariateFillImputer_MLJModels = ["missing_value_imputation"]
Expand Down
97 changes: 58 additions & 39 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import MLJ.MLJModels
import MLJ.MLJEnsembles
import MLJ.ScientificTypes
import MLJ.MLJBalancing
import MLJ.FeatureSelection
import ScientificTypesBase
import Distributions
using CategoricalArrays
Expand All @@ -37,7 +38,7 @@ isempty(problems) || error(
# compose the individual model docstring pages:
@info "Getting individual model docstrings from the registry and generating "*
"pages for them, written at /docs/src/models/ ."
for model in models()
for model in models(wrappers=true)
write_page(model)
end

Expand All @@ -54,45 +55,62 @@ pages = [
"Model Browser" => "model_browser.md",
"About MLJ" => "about_mlj.md",
"Learning MLJ" => "learning_mlj.md",
"Getting Started" => "getting_started.md",
"Common MLJ Workflows" => "common_mlj_workflows.md",
"Working with Categorical Data" => "working_with_categorical_data.md",
"Model Search" => "model_search.md",
"Loading Model Code" => "loading_model_code.md",
"Machines" => "machines.md",
"Evaluating Model Performance" => "evaluating_model_performance.md",
"Performance Measures" => "performance_measures.md",
"Weights" => "weights.md",
"Tuning Models" => "tuning_models.md",
"Learning Curves" => "learning_curves.md",
"Preparing Data" => "preparing_data.md",
"Transformers and Other Unsupervised models" => "transformers.md",
"More on Probabilistic Predictors" => "more_on_probabilistic_predictors.md",
"Composing Models" => "composing_models.md",
"Linear Pipelines" => "linear_pipelines.md",
"Target Transformations" => "target_transformations.md",
"Homogeneous Ensembles" => "homogeneous_ensembles.md",
"Correcting Class Imbalance" => "correcting_class_imbalance.md",
"Model Stacking" => "model_stacking.md",
"Learning Networks" => "learning_networks.md",
"Controlling Iterative Models" => "controlling_iterative_models.md",
"Generating Synthetic Data" => "generating_synthetic_data.md",
"Logging Workflows" => "logging_workflows.md",
"OpenML Integration" => "openml_integration.md",
"Acceleration and Parallelism" => "acceleration_and_parallelism.md",
"Simple User Defined Models" => "simple_user_defined_models.md",
"Quick-Start Guide to Adding Models" =>
"quick_start_guide_to_adding_models.md",
"Adding Models for General Use" => "adding_models_for_general_use.md",
"Modifying Behavior" => "modifying_behavior.md",
"Internals" => "internals.md",
"List of Supported Models" => "list_of_supported_models.md",
"Third Party Packages" => "third_party_packages.md",
"Glossary" => "glossary.md",
"MLJ Cheatsheet" => "mlj_cheatsheet.md",
"FAQ" => "frequently_asked_questions.md",
"Basics" => [
"Getting Started" => "getting_started.md",
"Common MLJ Workflows" => "common_mlj_workflows.md",
"Machines" => "machines.md",
"MLJ Cheatsheet" => "mlj_cheatsheet.md",
],
"Data" => [
"Working with Categorical Data" => "working_with_categorical_data.md",
"Preparing Data" => "preparing_data.md",
"Generating Synthetic Data" => "generating_synthetic_data.md",
"OpenML Integration" => "openml_integration.md",
],
"Model Basics" => [
"Model Search" => "model_search.md",
"Loading Model Code" => "loading_model_code.md",
"Transformers and Other Unsupervised models" => "transformers.md",
"List of Supported Models" => "list_of_supported_models.md",
],
"Meta-algorithms" => [
"Evaluating Model Performance" => "evaluating_model_performance.md",
"Tuning Models" => "tuning_models.md",
"Learning Curves" => "learning_curves.md",
"Controlling Iterative Models" => "controlling_iterative_models.md",
"Correcting Class Imbalance" => "correcting_class_imbalance.md",
"Thresholding Probabilistic Predictors" =>
"thresholding_probabilistic_predictors.md",
"Target Transformations" => "target_transformations.md",
"Homogeneous Ensembles" => "homogeneous_ensembles.md",
],
"Model Composition" => [
"Composing Models" => "composing_models.md",
"Linear Pipelines" => "linear_pipelines.md",
"Model Stacking" => "model_stacking.md",
"Learning Networks" => "learning_networks.md",
],
"Third Party Tools" => [
"Logging Workflows using MLflow" => "logging_workflows.md",
"Third Party Packages" => "third_party_packages.md",
],
"Customization and Extension" => [
"Simple User Defined Models" => "simple_user_defined_models.md",
"Quick-Start Guide to Adding Models" =>
"quick_start_guide_to_adding_models.md",
"Adding Models for General Use" => "adding_models_for_general_use.md",
"Modifying Behavior" => "modifying_behavior.md",
"Internals" => "internals.md",
],
"Miscellaneous" => [
"Performance Measures" => "performance_measures.md",
"Weights" => "weights.md",
"Acceleration and Parallelism" => "acceleration_and_parallelism.md",
"Glossary" => "glossary.md",
"FAQ" => "frequently_asked_questions.md",
],
"Index of Methods" => "api.md",
]
]

for (k, v) in pages
println("$k\t=>$v")
Expand All @@ -118,6 +136,7 @@ makedocs(
IterationControl,
CategoricalDistributions,
StatisticalMeasures,
FeatureSelection,
],
pages = pages,
warnonly = [:cross_references, :missing_docs],
Expand Down
11 changes: 6 additions & 5 deletions docs/model_docstring_tools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const PATH_TO_MODEL_DOCS = joinpath(@__DIR__, "src", "models")
"""
remove_doc_refs(str::AbstractString)
Removes `@ref` references from `str. For example, a substring of the form
Removes `@ref` references from `str`. For example, a substring of the form
"[`some.thing_like_this123!`](@ref)" is replaced with "`some.thing_like_this123!`".
"""
Expand All @@ -27,8 +27,8 @@ handle(model) = model.name*"_"*model.package_name
**Private method.**
Compose and write to file the documentation page for `model`. Here `model` is an entry in
the MLJ Model Registry, i.e., an element of `MLJModels.models()`. The file name has the
form `"ModelName_PackageName.md"`, for example,
the MLJ Model Registry, i.e., an element of `MLJModels.models(; wrappers=true)`. The file
name has the form `"ModelName_PackageName.md"`, for example,
`"DecisionTreeClassifier_DecisionTree.md"`. Such a page can be referenced from any other
markdown page in /docs/src/ like this: `[DecisionTreeClassifier](@ref
DecisionTreeClassifier_DecisionTree)`.
Expand Down Expand Up @@ -56,6 +56,7 @@ const DESCRIPTORS_GIVEN_HANDLE =
# determined the list of all descriptors, ranked by frequency:
const descriptors = vcat(values(DESCRIPTORS_GIVEN_HANDLE)...)
const ranking = MLJBase.countmap(descriptors)
ranking["meta algorithms"] = 1e10
const DESCRIPTORS = sort(unique(descriptors), by=d -> ranking[d], rev=true)
const HANDLES = keys(DESCRIPTORS_GIVEN_HANDLE)

Expand All @@ -67,7 +68,7 @@ handle as key in /docs/src/ModelDescriptors.toml.
"""
function models_missing_descriptors()
handles = handle.(models())
handles = handle.(models(wrappers=true))
filter(handles) do h
!(h in HANDLES)
end
Expand All @@ -82,7 +83,7 @@ Return the list of models with a given `descriptor`, such as "regressor", as
these appear in /src/docs/ModelDescriptors.toml.
"""
modelswith(descriptor) = filter(models()) do model
modelswith(descriptor) = filter(models(wrappers=true)) do model
descriptor in DESCRIPTORS_GIVEN_HANDLE[handle(model)]
end

Expand Down
7 changes: 4 additions & 3 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,19 @@ To support MLJ development, please cite these works or star the repo:
[Model Search](@ref model_search) |
[Loading Model Code](@ref) |
[Transformers and Other Unsupervised Models](@ref) |
[More on Probabilistic Predictors](@ref) |
[Composing Models](@ref) |
[Simple User Defined Models](@ref) |
[List of Supported Models](@ref model_list) |
[Third Party Packages](@ref)

### Meta-algorithms
[Evaluating Model Performance](@ref) |
[Tuning Models](@ref) |
[Composing Models](@ref) |
[Controlling Iterative Models](@ref) |
[Learning Curves](@ref)|
[Correcting Class Imbalance](@ref)
[Correcting Class Imbalance](@ref) |
[Thresholding Probabilistic Predictors](@ref)


### Composition
[Composing Models](@ref) |
Expand Down
Loading

0 comments on commit 7b3b12c

Please sign in to comment.