From cbe16d62848e8c549baf5cc4224609dad1f28c54 Mon Sep 17 00:00:00 2001 From: Melih Darcan <57872471+MelihDarcanxyz@users.noreply.github.com> Date: Wed, 5 Jul 2023 01:33:04 +0300 Subject: [PATCH 1/6] feat: Update ROADMAP.md be more understandable I had a difficulty of understanding the `Avoid common pain points of the other frameworks` section of the ROADMAP.md so I asked to Julia slack channel. I thought the list contained things to stay away in development so it got me confused. There, they explained to me every element of the list and after that, I understood it. Now, I think it can get other potential contributors confused and propose a better wording from my understanding. I hope it resembles the ideas of the MLJ. --- ROADMAP.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index aae494f03..db7ad35c0 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,11 +1,11 @@ # Road map -February 2020; updated, May 2021 +February 2020; updated, July 2023 Please visit [contributing guidelines](CONTRIBUTING.md) if interested in contributing to MLJ. -### Guiding goals +### Goals - **Usability, interoperability, extensibility, reproducibility,** and **code transparency**. @@ -13,15 +13,15 @@ in contributing to MLJ. - Offer state-of-art tools for model **composition** and model **optimization** (hyper-parameter tuning) -- Avoid common **pain-points** of other frameworks: +- Avoid common **pain-points** of other frameworks with MLJ: - - identifying all models that solve a given task + - identify and list all models that solve a given task - - routine operations requiring a lot of code + - easily perform routine operations requiring a lot of code - - passage from data source to algorithm-specific data format + - easily transform data, from source to algorithm-specific data format - - probabilistic predictions: inconsistent representations, lack + - make use of probabilistic predictions: no more inconsistent representations / lack of options for performance evaluation - Add some focus to julia machine learning software development more From 9d15e0bcf877fca8032df366e901b54009efccad Mon Sep 17 00:00:00 2001 From: OkonSamuel Date: Mon, 31 Jul 2023 17:40:11 +0100 Subject: [PATCH 2/6] add sirus.jl and symbolicregression.jl models to model browser --- docs/ModelDescriptors.toml | 6 ++++++ docs/src/list_of_supported_models.md | 2 ++ 2 files changed, 8 insertions(+) diff --git a/docs/ModelDescriptors.toml b/docs/ModelDescriptors.toml index 96fd1ab5f..19de154c1 100644 --- a/docs/ModelDescriptors.toml +++ b/docs/ModelDescriptors.toml @@ -141,6 +141,7 @@ MultitargetLinearRegressor_MultivariateStats = ["regression"] MultitargetNeuralNetworkRegressor_BetaML = ["regression"] MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"] MultitargetRidgeRegressor_MultivariateStats = ["regression"] +MultitargetSRRegressor_SymbolicRegression = ["regression"] NeuralNetworkClassifier_BetaML = ["classification"] NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models"] NeuralNetworkRegressor_BetaML = ["regression"] @@ -187,6 +188,11 @@ SGDClassifier_MLJScikitLearnInterface = ["classification"] SGDRegressor_MLJScikitLearnInterface = ["regression"] SODDetector_OutlierDetectionPython = ["outlier_detection", "outlier_detection"] SOSDetector_OutlierDetectionPython = ["outlier_detection"] +SRRegressor_SymbolicRegression = ["regression"] +StableForestClassifier_SIRUS = ["classification"] +StableForestRegressor_SIRUS = ["regression"] +StableRulesClassifier_SIRUS = ["classification"] +StableRulesRegressor_SIRUS = ["regression"] SVC_LIBSVM = ["classification"] SVMClassifier_MLJScikitLearnInterface = ["classification"] SVMLinearClassifier_MLJScikitLearnInterface = ["classification"] diff --git a/docs/src/list_of_supported_models.md b/docs/src/list_of_supported_models.md index 2b3dd323a..ef49cd224 100644 --- a/docs/src/list_of_supported_models.md +++ b/docs/src/list_of_supported_models.md @@ -50,6 +50,8 @@ independent assessment. [ParallelKMeans.jl](https://github.com/PyDataBlog/ParallelKMeans.jl) | - | KMeans | experimental | [PartialLeastSquaresRegressor.jl](https://github.com/lalvim/PartialLeastSquaresRegressor.jl) | - | PLSRegressor, KPLSRegressor | experimental | [ScikitLearn.jl](https://github.com/cstjean/ScikitLearn.jl) | [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl) | ARDRegressor, AdaBoostClassifier, AdaBoostRegressor, AffinityPropagation, AgglomerativeClustering, BaggingClassifier, BaggingRegressor, BayesianLDA, BayesianQDA, BayesianRidgeRegressor, BernoulliNBClassifier, Birch, ComplementNBClassifier, DBSCAN, DummyClassifier, DummyRegressor, ElasticNetCVRegressor, ElasticNetRegressor, ExtraTreesClassifier, ExtraTreesRegressor, FeatureAgglomeration, GaussianNBClassifier, GaussianProcessClassifier, GaussianProcessRegressor, GradientBoostingClassifier, GradientBoostingRegressor, HuberRegressor, KMeans, KNeighborsClassifier, KNeighborsRegressor, LarsCVRegressor, LarsRegressor, LassoCVRegressor, LassoLarsCVRegressor, LassoLarsICRegressor, LassoLarsRegressor, LassoRegressor, LinearRegressor, LogisticCVClassifier, LogisticClassifier, MeanShift, MiniBatchKMeans, MultiTaskElasticNetCVRegressor, MultiTaskElasticNetRegressor, MultiTaskLassoCVRegressor, MultiTaskLassoRegressor, MultinomialNBClassifier, OPTICS, OrthogonalMatchingPursuitCVRegressor, OrthogonalMatchingPursuitRegressor, PassiveAggressiveClassifier, PassiveAggressiveRegressor, PerceptronClassifier, ProbabilisticSGDClassifier, RANSACRegressor, RandomForestClassifier, RandomForestRegressor, RidgeCVClassifier, RidgeCVRegressor, RidgeClassifier, RidgeRegressor, SGDClassifier, SGDRegressor, SVMClassifier, SVMLClassifier, SVMLRegressor, SVMNuClassifier, SVMNuRegressor, SVMRegressor, SpectralClustering, TheilSenRegressor | high² | +[SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl) | - | StableForestClassifier, StableForestRegressor, StableRulesClassifier, StableRulesRegressor | low | +[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl) | - | MultitargetSRRegressor, SRRegressor | experimental | [TSVD.jl](https://github.com/JuliaLinearAlgebra/TSVD.jl) | [MLJTSVDInterface.jl](https://github.com/JuliaAI/MLJTSVDInterface.jl) | TSVDTransformer | high | [XGBoost.jl](https://github.com/dmlc/XGBoost.jl) | [MLJXGBoostInterface.jl](https://github.com/JuliaAI/MLJXGBoostInterface.jl) | XGBoostRegressor, XGBoostClassifier, XGBoostCount | high | From 24abc9f4eb4c487b5fbba76843e4188cac47b02b Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 24 Aug 2023 12:19:05 +1200 Subject: [PATCH 3/6] add MLJFlow and rexport names --- Project.toml | 4 ++++ src/MLJ.jl | 5 ++++- test/exported_names.jl | 5 +++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 943c39bdd..997915f31 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJEnsembles = "50ed68f4-41fd-4504-931a-ed422449fee0" +MLJFlow = "7b7b8358-b45c-48ea-a8ef-7ca328ad328f" MLJIteration = "614be32b-d00c-4edb-bd02-1eb411ab5e55" MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" @@ -18,6 +19,7 @@ OpenML = "8b6db2d4-7670-4922-a472-f9537c81ab66" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" @@ -29,11 +31,13 @@ ComputationalResources = "0.3" Distributions = "0.21,0.22,0.23, 0.24, 0.25" MLJBase = "0.21.3" MLJEnsembles = "0.3" +MLJFlow = "0.1" MLJIteration = "0.5" MLJModels = "0.16" MLJTuning = "0.7" OpenML = "0.2,0.3" ProgressMeter = "1.1" +Reexport = "1.2" ScientificTypes = "3" StatsBase = "0.32,0.33, 0.34" Tables = "0.2,1.0" diff --git a/src/MLJ.jl b/src/MLJ.jl index d260b70c5..5f3c1df20 100644 --- a/src/MLJ.jl +++ b/src/MLJ.jl @@ -8,6 +8,8 @@ import Distributed: @distributed, nworkers, pmap import Pkg import Pkg.TOML +using Reexport + # from the MLJ universe: using MLJBase import MLJBase.save @@ -15,6 +17,7 @@ using MLJEnsembles using MLJTuning using MLJModels using OpenML +@reexport using MLJFlow using MLJIteration import MLJIteration.IterationControl @@ -89,7 +92,7 @@ export nrows, color_off, color_on, @load_boston, @load_ames, @load_iris, @load_reduced_ames, @load_crabs, load_boston, load_ames, load_iris, load_reduced_ames, load_crabs, Machine, machine, AbstractNode, @node, - source, node, fit!, freeze!, thaw!, Node, sources, origins, + machines, sources, anonymize!, @from_network, fitresults, @pipeline, Stack, Pipeline, TransformedTargetModel, ResamplingStrategy, Holdout, CV, TimeSeriesCV, diff --git a/test/exported_names.jl b/test/exported_names.jl index 8f19e8b5b..fe95fc3e7 100644 --- a/test/exported_names.jl +++ b/test/exported_names.jl @@ -22,4 +22,9 @@ Save() @test OpenML.load isa Function + +# MLJFlow + +MLFlowLogger + true From ac8eb973c56124d20bc101ec376966e67f584506 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 24 Aug 2023 13:29:40 +1200 Subject: [PATCH 4/6] add a page in manual on MLJFlow functionality --- docs/make.jl | 1 + docs/src/evaluating_model_performance.md | 7 +++++-- docs/src/logging_workflows.md | 12 ++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 docs/src/logging_workflows.md diff --git a/docs/make.jl b/docs/make.jl index 76959a53c..f4d13ec69 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -74,6 +74,7 @@ pages = [ "Learning Networks" => "learning_networks.md", "Controlling Iterative Models" => "controlling_iterative_models.md", "Generating Synthetic Data" => "generating_synthetic_data.md", + "Logging Workflows" => "logging_workflows.md", "OpenML Integration" => "openml_integration.md", "Acceleration and Parallelism" => "acceleration_and_parallelism.md", "Simple User Defined Models" => "simple_user_defined_models.md", diff --git a/docs/src/evaluating_model_performance.md b/docs/src/evaluating_model_performance.md index 121575718..63476d14f 100644 --- a/docs/src/evaluating_model_performance.md +++ b/docs/src/evaluating_model_performance.md @@ -9,8 +9,11 @@ In addition to hold-out and cross-validation, the user can specify an explicit list of train/test pairs of row indices for resampling, or define new resampling strategies. -For simultaneously evaluating *multiple* models and/or data -sets, see [Benchmarking](benchmarking.md). +For simultaneously evaluating *multiple* models, see [Comparing models of different type +and nested cross-validation](@ref). + +For externally logging the outcomes of performance evaluation experiments, see [Logging +Workflows](@ref) ## Evaluating against a single measure diff --git a/docs/src/logging_workflows.md b/docs/src/logging_workflows.md new file mode 100644 index 000000000..7f4b468cd --- /dev/null +++ b/docs/src/logging_workflows.md @@ -0,0 +1,12 @@ +# Logging Workflows + +## MLflow integration + +[MLflow](https://mlflow.org) is a popular, language-agnostic, tool for externally logging +the outcomes of machine learning experiments, including those carried out using MLJ. + +This functionality is provided by the [MLJFlow.jl](https://github.com/JuliaAI/MLJFlow.jl) +package whose methods are automatically available to MLJ users. Refer to the package's +documentation for examples. + + From 9809a103e49bbb17a88d126d5b2416335fab96a9 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 24 Aug 2023 14:07:05 +1200 Subject: [PATCH 5/6] bump 0.19.3 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 997915f31..2f6e92ed8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJ" uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" authors = ["Anthony D. Blaom "] -version = "0.19.2" +version = "0.19.3" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From 585a1200998e3333a85b438e482028bd05fe5327 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 24 Aug 2023 15:18:23 +1200 Subject: [PATCH 6/6] update landing page --- docs/src/index.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/src/index.md b/docs/src/index.md index 18a857428..e73e5b924 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -75,6 +75,10 @@ To support MLJ development, please cite these works or star the repo: [Model Stacking](@ref) | [Learning Networks](@ref) +### Integration +[Logging Workflows](@ref) | +[OpenML Integration](@ref) + ### Customization and Extension [Simple User Defined Models](@ref) | [Quick-Start Guide to Adding Models](@ref) |