From 04f929e3d04b81f48d7770fc8e0f27a3d8bd33db Mon Sep 17 00:00:00 2001 From: Martin Hirzel Date: Fri, 4 Aug 2023 17:59:52 -0400 Subject: [PATCH] Added origin links for non-OpenML fairness datasets and references to recent Lale papers. Signed-off-by: Martin Hirzel --- docs/papers.rst | 49 +++++++++++++++++++++++++++++++++++-- lale/lib/aif360/__init__.py | 3 +++ lale/lib/aif360/datasets.py | 8 ++++-- 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/docs/papers.rst b/docs/papers.rst index 1706c90c6..a65011efb 100644 --- a/docs/papers.rst +++ b/docs/papers.rst @@ -4,7 +4,7 @@ Papers "Pipeline Combinators for Gradual AutoML". Guillaume Baudart, Martin Hirzel, Kiran Kate, Parikshit Ram, Avraham Shinnar, and Jason Tsay. -Conference on Neural Information Processing Systems (NeurIPS), December 2021. +Conference on Neural Information Processing Systems (NeurIPS), pages 19705-19718, December 2021. https://proceedings.neurips.cc/paper/2021/file/a3b36cb25e2e0b93b5f334ffb4e4064e-Paper.pdf **This is the preferred citation for the Lale project.** @@ -17,9 +17,54 @@ https://proceedings.neurips.cc/paper/2021/file/a3b36cb25e2e0b93b5f334ffb4e4064e- booktitle = "Advances in Neural Information Processing Systems (NeurIPS)", year = 2021, month = dec, + pages = "19705--19718", url = "https://proceedings.neurips.cc/paper/2021/file/a3b36cb25e2e0b93b5f334ffb4e4064e-Paper.pdf" } +"Searching for Fairer Machine Learning Ensembles, +Michael Feffer, Martin Hirzel, Samuel C. Hoffman, Kiran Kate, Parikshit Ram, and Avraham Shinnar. +Conference on Automated Machine Learning (AutoML), September 2023. + +.. code:: BibTeX + + @InProceedings{feffer_et_al_2023, + title = "Searching for Fairer Machine Learning Ensembles", + author = "Feffer, Michael and Hirzel, Martin and Hoffman, Samuel C. and Kate, Kiran and Ram, Parikshit and Shinnar, Avraham", + booktitle = "Conference on Automated Machine Learning (AutoML)", + year = 2023, + month = sep } + + +"A Suite of Fairness Datasets for Tabular Classification", +Martin Hirzel and Michael Feffer. + arXiv:2308.00133 [cs.LG], July 2023. +https://arxiv.org/abs/2308.00133 + +.. code:: BibTeX + + @Misc{hirzel_feffer_2023, + title = "A Suite of Fairness Datasets for Tabular Classification", + author = "Hirzel, Martin and Feffer, Michael", + year = 2023, + month = jul, + url = "https://arxiv.org/abs/2308.00133" } + + +"AI for Low-Code for AI", +Nikitha Rao, Jason Tsay, Kiran Kate, Vincent J. Hellendoorn, and Martin Hirzel. +arXiv:2305.20015 [cs.SE], May 2023. +https://arxiv.org/abs/2305.20015 + +.. code:: BibTeX + + @Misc{rao_et_al_2023, + title = "{AI} for Low-Code for {AI}", + author = "Rao, Nikitha and Tsay, Jason and Kate, Kiran and Hellendoorn, Vincent J. and Hirzel, Martin", + year = 2023, + month = may, + url = "https://arxiv.org/abs/2305.20015" } + + "Gradual AutoML using Lale". Kiran Kate, Martin Hirzel, Parikshit Ram, Avraham Shinnar, and Jason Tsay. Tutorial at Conference on Knowledge Discovery and Data Mining (KDD-Tutorial), August 2022. @@ -33,7 +78,7 @@ https://doi.org/10.1145/3534678.3542630 booktitle = "Tutorial at the Conference on Knowledge Discovery and Data Mining (KDD-Tutorial)", year = 2022, month = aug, - pages = "pages 4794--4795", + pages = "4794--4795", url = "https://doi.org/10.1145/3534678.3542630" } diff --git a/lale/lib/aif360/__init__.py b/lale/lib/aif360/__init__.py index f49b943d9..b51609c4e 100644 --- a/lale/lib/aif360/__init__.py +++ b/lale/lib/aif360/__init__.py @@ -66,6 +66,8 @@ Datasets: ========= +`datasets module docstring`_ + * `fetch_adult_df`_ * `fetch_bank_df`_ * `fetch_compas_df`_ @@ -174,6 +176,7 @@ .. _`equal_opportunity_difference`: lale.lib.aif360.util.html#lale.lib.aif360.util.equal_opportunity_difference .. _`f1_and_disparate_impact`: lale.lib.aif360.util.html#lale.lib.aif360.util.f1_and_disparate_impact .. _`fair_stratified_train_test_split`: lale.lib.aif360.util.html#lale.lib.aif360.util.fair_stratified_train_test_split +.. _`datasets module docstring`: https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.datasets.html .. _`fetch_adult_df`: lale.lib.aif360.datasets.html#lale.lib.aif360.datasets.fetch_adult_df .. _`fetch_bank_df`: lale.lib.aif360.datasets.html#lale.lib.aif360.datasets.fetch_bank_df .. _`fetch_compas_df`: lale.lib.aif360.datasets.html#lale.lib.aif360.datasets.fetch_compas_df diff --git a/lale/lib/aif360/datasets.py b/lale/lib/aif360/datasets.py index b34efbacf..250bd7ad7 100644 --- a/lale/lib/aif360/datasets.py +++ b/lale/lib/aif360/datasets.py @@ -16,17 +16,21 @@ See the notebook `demo_fairness_datasets`_ for an example for using the functions, along with some tables and figures about them. +There is also an `arxiv paper`_ about these datasets. Some of the fetcher methods have a `preprocess` argument that defaults to False. The notebook does not use that argument, instead demonstrating how to do any required preprocessing in the context of a Lale pipeline. -Most of the datasets are from `OpenML`_, and most of the datasets -have been used in various papers. +Most of the datasets are from `OpenML`_, a few are from `meps.ahrq`_ or + `ProPublica`_, and most of the datasets have been used in various papers. The Lale library does not distribute the datasets themselves, it only provides methods for downloading them. .. _`demo_fairness_datasets`: https://github.com/IBM/lale/blob/master/examples/demo_fairness_datasets.ipynb +.. _`arXiv paper`: https://arxiv.org/abs/2308.00133 .. _`OpenML`: https://www.openml.org/ +.. _`meps.ahrq`: https://meps.ahrq.gov/data_stats/data_use.jsp +.. _`ProPublica`: https://github.com/propublica/compas-analysis """ import logging