Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exports flattened output for erfs_fpr #177

Merged
merged 8 commits into from
Oct 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
# Changelog

## 0.12.0 [173](https://github.com/openfisca/openfisca-france-data/pull/173
## 0.13.0 [177](https://github.com/openfisca/openfisca-france-data/pull/177)

- Introduce a file export that contains only one flattened table (`dummy_data.h5`) instead of exporting a file with several tables.
- Adds `export_flattened_df` argument in `create_input_data_frame function`.
- Bump `numexpr` top version.

## 0.12.0 [173](https://github.com/openfisca/openfisca-france-data/pull/173)

- Get some cleaner stuff from IPP modifs from CASD.

### 0.11.1 [172](https://github.com/openfisca/openfisca-france-data/pull/172
### 0.11.1 [172](https://github.com/openfisca/openfisca-france-data/pull/172)

- Fix numpy dependency to deal with openfisca-survey-manager deps (see https://github.com/openfisca/openfisca-survey-manager/pull/79).

Expand Down
9 changes: 6 additions & 3 deletions openfisca_france_data/erfs_fpr/input_data_builder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


#@dispatch(int)
def build(year: int) -> None:
def build(year: int, export_flattened_df_filepath: str = None) -> None:
"""
Ici on va nettoyer et formatter les donnés ERFS-FPR, pour les rendre OpenFisca-like
"""
Expand Down Expand Up @@ -54,16 +54,19 @@ def build(year: int) -> None:
#
# On crée une df par entité par période.
# Elles sont stockées dans un fichier h5
final.create_input_data_frame(year = year)
final.create_input_data_frame(year = year, export_flattened_df_filepath = export_flattened_df_filepath)


if __name__ == '__main__':
import sys
import time
start = time.time()
logging.basicConfig(level = logging.INFO, stream = sys.stdout)

year = 2014
build(year = year)
export_flattened_df_filepath = "./dummy_data.h5" # Could be disabled with None
build(year = year, export_flattened_df_filepath = export_flattened_df_filepath)
# TODO: create_enfants_a_naitre(year = year)

log.info("Script finished after {}".format(time.time() - start))
print(time.time() - start)
14 changes: 11 additions & 3 deletions openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


@temporary_store_decorator(file_name = 'erfs_fpr')
def create_input_data_frame(temporary_store = None, year = None):
def create_input_data_frame(temporary_store = None, year = None, export_flattened_df_filepath = None):
assert temporary_store is not None
assert year is not None

Expand Down Expand Up @@ -95,7 +95,14 @@ def create_input_data_frame(temporary_store = None, year = None):
)

individus = format_ids_and_roles(individus)

if export_flattened_df_filepath:
supermerge = individus.merge(
menages,
right_index = True,
left_on = "idmen",
suffixes = ("", "_x"))
supermerge.to_hdf(export_flattened_df_filepath, key = "input")
# Enters the individual table into the openfisca_erfs_fpr collection
set_table_in_survey(
individus,
entity = "individu",
Expand All @@ -104,6 +111,7 @@ def create_input_data_frame(temporary_store = None, year = None):
survey_name = 'input',
)


# assert 'f4ba' in data_frame.columns


Expand Down Expand Up @@ -206,7 +214,7 @@ def extract_menages_variables(menages):
logging.basicConfig(level = logging.INFO, stream = sys.stdout)
year = 2014
data_frame = create_input_data_frame(year = year)
print('ok')
log.info('Ok')

# TODO
# Variables revenus collectifs
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name = "OpenFisca-France-Data",
version = "0.12.0",
version = "0.13.0",
description = "OpenFisca-France module to work with French survey data",
long_description = long_description,
author = "OpenFisca Team",
Expand All @@ -33,8 +33,8 @@
"openfisca-core >= 34.2.2, < 35.0.0",
"openFisca-france >= 42.1.0, < 43.0.0",
"openFisca-survey-manager >= 0.24, < 1.0.0",
"numpy >= 1.11,<1.16", # openfisca-survey-manager deps and https://github.com/openfisca/openfisca-survey-manager/pull/79
"numexpr == 2.6.8",
"numpy >= 1.11, < 1.16", # openfisca-survey-manager deps and https://github.com/openfisca/openfisca-survey-manager/pull/79
"numexpr >= 2.6.8, < 2.8.0",
"pandas >= 0.20.3, < 1.0.0",
"tables >= 3.0.0, < 4.0.0", # Needed by pandas.HDFStore
"wquantiles >= 0.3.0, < 1.0.0" # To compute weighted quantiles
Expand Down