From b514e361332d60a1e0dc6a12134875e6a9a18da0 Mon Sep 17 00:00:00 2001 From: Augustin Wenger Date: Thu, 17 Oct 2019 17:32:30 +0200 Subject: [PATCH 1/8] now exports flattened output for erfs_fpr. --- .../erfs_fpr/input_data_builder/__init__.py | 2 +- .../erfs_fpr/input_data_builder/step_05_final.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 71baaacb..3792227f 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -54,7 +54,7 @@ def build(year: int) -> None: # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - final.create_input_data_frame(year = year) + final.create_input_data_frame(year = year,export_flattened_df=True) if __name__ == '__main__': diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 63a622ee..23dfbdf6 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -15,7 +15,7 @@ @temporary_store_decorator(file_name = 'erfs_fpr') -def create_input_data_frame(temporary_store = None, year = None): +def create_input_data_frame(temporary_store = None, year = None,export_flattened_df=False): assert temporary_store is not None assert year is not None @@ -95,7 +95,11 @@ def create_input_data_frame(temporary_store = None, year = None): ) individus = format_ids_and_roles(individus) - + if export_flattened_df: + supermerge=individus.merge(menages,right_index=True,left_on="idmen",suffixes=("","_x")) + print(len(individus),len(supermerge)) + supermerge.to_hdf("dummy_data.h5",key="input") + # Enters the individual table into the openfisca_erfs_fpr collection set_table_in_survey( individus, entity = "individu", @@ -104,6 +108,7 @@ def create_input_data_frame(temporary_store = None, year = None): survey_name = 'input', ) + # assert 'f4ba' in data_frame.columns From 15ddc19785cdcb2e0f3e08c1b4970621214fd013 Mon Sep 17 00:00:00 2001 From: sandcha Date: Mon, 21 Oct 2019 14:30:18 +0200 Subject: [PATCH 2/8] Add spacing --- .../erfs_fpr/input_data_builder/__init__.py | 2 +- .../erfs_fpr/input_data_builder/step_05_final.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 3792227f..22e1b48d 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -54,7 +54,7 @@ def build(year: int) -> None: # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - final.create_input_data_frame(year = year,export_flattened_df=True) + final.create_input_data_frame(year = year, export_flattened_df=True) if __name__ == '__main__': diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 23dfbdf6..7144fdec 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -15,7 +15,7 @@ @temporary_store_decorator(file_name = 'erfs_fpr') -def create_input_data_frame(temporary_store = None, year = None,export_flattened_df=False): +def create_input_data_frame(temporary_store = None, year = None, export_flattened_df=False): assert temporary_store is not None assert year is not None @@ -96,9 +96,13 @@ def create_input_data_frame(temporary_store = None, year = None,export_flattened individus = format_ids_and_roles(individus) if export_flattened_df: - supermerge=individus.merge(menages,right_index=True,left_on="idmen",suffixes=("","_x")) + supermerge=individus.merge( + menages, + right_index = True, + left_on = "idmen", + suffixes = ("","_x")) print(len(individus),len(supermerge)) - supermerge.to_hdf("dummy_data.h5",key="input") + supermerge.to_hdf("dummy_data.h5", key = "input") # Enters the individual table into the openfisca_erfs_fpr collection set_table_in_survey( individus, From 2d2b8f3af228de1422e31c5454408f4fcb3a3c6f Mon Sep 17 00:00:00 2001 From: sandcha Date: Mon, 21 Oct 2019 14:33:21 +0200 Subject: [PATCH 3/8] Cleaning prints and logs --- .../erfs_fpr/input_data_builder/step_05_final.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 7144fdec..3964078f 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -100,8 +100,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene menages, right_index = True, left_on = "idmen", - suffixes = ("","_x")) - print(len(individus),len(supermerge)) + suffixes = ("", "_x")) supermerge.to_hdf("dummy_data.h5", key = "input") # Enters the individual table into the openfisca_erfs_fpr collection set_table_in_survey( @@ -215,7 +214,7 @@ def extract_menages_variables(menages): logging.basicConfig(level = logging.INFO, stream = sys.stdout) year = 2014 data_frame = create_input_data_frame(year = year) - print('ok') + log.info('Ok') # TODO # Variables revenus collectifs From d87b6483f2cbe41b5c2cc1a716752f4d59e12f0d Mon Sep 17 00:00:00 2001 From: sandcha Date: Mon, 21 Oct 2019 14:59:52 +0200 Subject: [PATCH 4/8] Set flattened h5 file path as argument --- .../erfs_fpr/input_data_builder/__init__.py | 2 +- .../erfs_fpr/input_data_builder/step_05_final.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 22e1b48d..6d30163c 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -54,7 +54,7 @@ def build(year: int) -> None: # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - final.create_input_data_frame(year = year, export_flattened_df=True) + final.create_input_data_frame(year = year, export_flattened_df="./dummy_data.h5") if __name__ == '__main__': diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 3964078f..73f06133 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -15,7 +15,7 @@ @temporary_store_decorator(file_name = 'erfs_fpr') -def create_input_data_frame(temporary_store = None, year = None, export_flattened_df=False): +def create_input_data_frame(temporary_store = None, year = None, export_flattened_df_filepath = None): assert temporary_store is not None assert year is not None @@ -95,13 +95,13 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene ) individus = format_ids_and_roles(individus) - if export_flattened_df: - supermerge=individus.merge( + if export_flattened_df_filepath: + supermerge = individus.merge( menages, right_index = True, left_on = "idmen", suffixes = ("", "_x")) - supermerge.to_hdf("dummy_data.h5", key = "input") + supermerge.to_hdf(export_flattened_df_filepath, key = "input") # Enters the individual table into the openfisca_erfs_fpr collection set_table_in_survey( individus, From e4e639c5f6b90a2b5b4c915ccbfb04da344d8467 Mon Sep 17 00:00:00 2001 From: sandcha Date: Mon, 21 Oct 2019 15:24:12 +0200 Subject: [PATCH 5/8] fixup! Set flattened h5 file path as argument --- openfisca_france_data/erfs_fpr/input_data_builder/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 6d30163c..d62e3de6 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -54,7 +54,7 @@ def build(year: int) -> None: # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - final.create_input_data_frame(year = year, export_flattened_df="./dummy_data.h5") + final.create_input_data_frame(year = year, export_flattened_df_filepath="./dummy_data.h5") if __name__ == '__main__': From 92c74cc19efaea18f47cf213588f3ff000909f33 Mon Sep 17 00:00:00 2001 From: sandcha Date: Mon, 21 Oct 2019 15:33:19 +0200 Subject: [PATCH 6/8] Set flattened h5 file path as main script config --- .../erfs_fpr/input_data_builder/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index d62e3de6..bd068b65 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -18,7 +18,7 @@ #@dispatch(int) -def build(year: int) -> None: +def build(year: int, export_flattened_df_filepath: str = None) -> None: """ Ici on va nettoyer et formatter les donnés ERFS-FPR, pour les rendre OpenFisca-like """ @@ -54,7 +54,7 @@ def build(year: int) -> None: # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - final.create_input_data_frame(year = year, export_flattened_df_filepath="./dummy_data.h5") + final.create_input_data_frame(year = year, export_flattened_df_filepath = export_flattened_df_filepath) if __name__ == '__main__': @@ -62,8 +62,11 @@ def build(year: int) -> None: import time start = time.time() logging.basicConfig(level = logging.INFO, stream = sys.stdout) + year = 2014 - build(year = year) + export_flattened_df_filepath = "./dummy_data.h5" # Could be disabled with None + build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) # TODO: create_enfants_a_naitre(year = year) + log.info("Script finished after {}".format(time.time() - start)) print(time.time() - start) From b6a62ad3872388fe290a67b249b1e95371fbd1b2 Mon Sep 17 00:00:00 2001 From: sandcha Date: Mon, 21 Oct 2019 16:52:47 +0200 Subject: [PATCH 7/8] Upgrade numexpr when available --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 8eecb73a..3c3752cd 100755 --- a/setup.py +++ b/setup.py @@ -33,8 +33,8 @@ "openfisca-core >= 34.2.2, < 35.0.0", "openFisca-france >= 42.1.0, < 43.0.0", "openFisca-survey-manager >= 0.24, < 1.0.0", - "numpy >= 1.11,<1.16", # openfisca-survey-manager deps and https://github.com/openfisca/openfisca-survey-manager/pull/79 - "numexpr == 2.6.8", + "numpy >= 1.11, < 1.16", # openfisca-survey-manager deps and https://github.com/openfisca/openfisca-survey-manager/pull/79 + "numexpr >= 2.6.8, < 2.8.0", "pandas >= 0.20.3, < 1.0.0", "tables >= 3.0.0, < 4.0.0", # Needed by pandas.HDFStore "wquantiles >= 0.3.0, < 1.0.0" # To compute weighted quantiles From 9ff37bcba2c4b2ab5aab0a832217db1b11c2e952 Mon Sep 17 00:00:00 2001 From: sandcha Date: Thu, 24 Oct 2019 12:24:31 +0200 Subject: [PATCH 8/8] Bump version --- CHANGELOG.md | 10 ++++++++-- setup.py | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3e04700..b933bca3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,16 @@ # Changelog -## 0.12.0 [173](https://github.com/openfisca/openfisca-france-data/pull/173 +## 0.13.0 [177](https://github.com/openfisca/openfisca-france-data/pull/177) + +- Introduce a file export that contains only one flattened table (`dummy_data.h5`) instead of exporting a file with several tables. + - Adds `export_flattened_df` argument in `create_input_data_frame function`. +- Bump `numexpr` top version. + +## 0.12.0 [173](https://github.com/openfisca/openfisca-france-data/pull/173) - Get some cleaner stuff from IPP modifs from CASD. -### 0.11.1 [172](https://github.com/openfisca/openfisca-france-data/pull/172 +### 0.11.1 [172](https://github.com/openfisca/openfisca-france-data/pull/172) - Fix numpy dependency to deal with openfisca-survey-manager deps (see https://github.com/openfisca/openfisca-survey-manager/pull/79). diff --git a/setup.py b/setup.py index 3c3752cd..0e6fb681 100755 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name = "OpenFisca-France-Data", - version = "0.12.0", + version = "0.13.0", description = "OpenFisca-France module to work with French survey data", long_description = long_description, author = "OpenFisca Team",