diff --git a/CHANGELOG.md b/CHANGELOG.md index c3e04700..b933bca3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,16 @@ # Changelog -## 0.12.0 [173](https://github.com/openfisca/openfisca-france-data/pull/173 +## 0.13.0 [177](https://github.com/openfisca/openfisca-france-data/pull/177) + +- Introduce a file export that contains only one flattened table (`dummy_data.h5`) instead of exporting a file with several tables. + - Adds `export_flattened_df` argument in `create_input_data_frame function`. +- Bump `numexpr` top version. + +## 0.12.0 [173](https://github.com/openfisca/openfisca-france-data/pull/173) - Get some cleaner stuff from IPP modifs from CASD. -### 0.11.1 [172](https://github.com/openfisca/openfisca-france-data/pull/172 +### 0.11.1 [172](https://github.com/openfisca/openfisca-france-data/pull/172) - Fix numpy dependency to deal with openfisca-survey-manager deps (see https://github.com/openfisca/openfisca-survey-manager/pull/79). diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 71baaacb..bd068b65 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -18,7 +18,7 @@ #@dispatch(int) -def build(year: int) -> None: +def build(year: int, export_flattened_df_filepath: str = None) -> None: """ Ici on va nettoyer et formatter les donnés ERFS-FPR, pour les rendre OpenFisca-like """ @@ -54,7 +54,7 @@ def build(year: int) -> None: # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - final.create_input_data_frame(year = year) + final.create_input_data_frame(year = year, export_flattened_df_filepath = export_flattened_df_filepath) if __name__ == '__main__': @@ -62,8 +62,11 @@ def build(year: int) -> None: import time start = time.time() logging.basicConfig(level = logging.INFO, stream = sys.stdout) + year = 2014 - build(year = year) + export_flattened_df_filepath = "./dummy_data.h5" # Could be disabled with None + build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) # TODO: create_enfants_a_naitre(year = year) + log.info("Script finished after {}".format(time.time() - start)) print(time.time() - start) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 63a622ee..73f06133 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -15,7 +15,7 @@ @temporary_store_decorator(file_name = 'erfs_fpr') -def create_input_data_frame(temporary_store = None, year = None): +def create_input_data_frame(temporary_store = None, year = None, export_flattened_df_filepath = None): assert temporary_store is not None assert year is not None @@ -95,7 +95,14 @@ def create_input_data_frame(temporary_store = None, year = None): ) individus = format_ids_and_roles(individus) - + if export_flattened_df_filepath: + supermerge = individus.merge( + menages, + right_index = True, + left_on = "idmen", + suffixes = ("", "_x")) + supermerge.to_hdf(export_flattened_df_filepath, key = "input") + # Enters the individual table into the openfisca_erfs_fpr collection set_table_in_survey( individus, entity = "individu", @@ -104,6 +111,7 @@ def create_input_data_frame(temporary_store = None, year = None): survey_name = 'input', ) + # assert 'f4ba' in data_frame.columns @@ -206,7 +214,7 @@ def extract_menages_variables(menages): logging.basicConfig(level = logging.INFO, stream = sys.stdout) year = 2014 data_frame = create_input_data_frame(year = year) - print('ok') + log.info('Ok') # TODO # Variables revenus collectifs diff --git a/setup.py b/setup.py index 8eecb73a..0e6fb681 100755 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name = "OpenFisca-France-Data", - version = "0.12.0", + version = "0.13.0", description = "OpenFisca-France module to work with French survey data", long_description = long_description, author = "OpenFisca Team", @@ -33,8 +33,8 @@ "openfisca-core >= 34.2.2, < 35.0.0", "openFisca-france >= 42.1.0, < 43.0.0", "openFisca-survey-manager >= 0.24, < 1.0.0", - "numpy >= 1.11,<1.16", # openfisca-survey-manager deps and https://github.com/openfisca/openfisca-survey-manager/pull/79 - "numexpr == 2.6.8", + "numpy >= 1.11, < 1.16", # openfisca-survey-manager deps and https://github.com/openfisca/openfisca-survey-manager/pull/79 + "numexpr >= 2.6.8, < 2.8.0", "pandas >= 0.20.3, < 1.0.0", "tables >= 3.0.0, < 4.0.0", # Needed by pandas.HDFStore "wquantiles >= 0.3.0, < 1.0.0" # To compute weighted quantiles