Skip to content

Commit

Permalink
fixed feature extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
wiebket committed May 5, 2019
1 parent 7576ffd commit a43c51b
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
12 changes: 6 additions & 6 deletions delprocess/data/specs/dist_base_00.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"year_range" : ["2000","2014"],
"features" : ["monthly_income", "water_access", "roof_material", "wall_material", "cb_size", "floor_area", "years_electrified", "adults","children","part_time","unemployed","pension"],
"features" : ["monthly_income", "water_access", "roof_material", "wall_material", "cb_size", "floor_area", "years_electrified", "total_adults","total_children","total_part_time","total_unemployed","total_pensioners"],
"searchlist" : ["earn per month","external","money from small business","watersource", "roof", "wall", "main switch", "floor area","electricity", "males","part time","unemployed","pension"],
"transform": {
"monthly_income" : "x['earn per month'] + x.fillna(0)['money from small business'] + x.fillna(0)['external']",
Expand All @@ -10,11 +10,11 @@
"cb_size" : "x['main switch']",
"floor_area" : "x['floor area']",
"years_electrified" : "x['electricity']",
"adults" : "x.fillna(0)['number of males: 16-24'] + x.fillna(0)['number of females: 16-24'] + x.fillna(0)['number of males: 25 - 34'] + x.fillna(0)['number of females: 25-34'] + x.fillna(0)['number of males: 35-49'] + x.fillna(0)['number of females: 35 - 49'] + x.fillna(0)['number of males: 50+']",
"children" : "x.fillna(0)['number of males: <16'] + x.fillna(0)['number of females: <16']",
"part_time" : "x.fillna(0)['head emploed part time'] + x.fillna(0)['spouse employed part time'] + x.fillna(0)['persons older than 16 years: employed parttime']",
"unemployed" : "x.fillna(0)['head unemployed'] + x.fillna(0)['spouse unemployed'] + x.fillna(0)['persons older than 16 years: employed unemployed']",
"pension" : "x.fillna(0)['head on pension'] + x.fillna(0)['spouse on pension'] + x.fillna(0)['persons older than 16 years: pension']"
"total_adults" : "x.fillna(0)['number of males: 16-24'] + x.fillna(0)['number of females: 16-24'] + x.fillna(0)['number of males: 25 - 34'] + x.fillna(0)['number of females: 25-34'] + x.fillna(0)['number of males: 35-49'] + x.fillna(0)['number of females: 35 - 49'] + x.fillna(0)['number of males: 50+']",
"total_children" : "x.fillna(0)['number of males: <16'] + x.fillna(0)['number of females: <16']",
"total_part_time" : "x.fillna(0)['head emploed part time'] + x.fillna(0)['spouse employed part time'] + x.fillna(0)['persons older than 16 years: employed parttime']",
"total_unemployed" : "x.fillna(0)['head unemployed'] + x.fillna(0)['spouse unemployed'] + x.fillna(0)['persons older than 16 years: employed unemployed']",
"total_pensioners" : "x.fillna(0)['head on pension'] + x.fillna(0)['spouse on pension'] + x.fillna(0)['persons older than 16 years: pension']"
},
"bins" : {},
"labels" : {},
Expand Down
10 changes: 5 additions & 5 deletions delprocess/data/specs/dist_base_94 .txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"year_range" : ["1994","1999"],
"features" : ["monthly_income", "water_access", "roof_material", "wall_material", "cb_size", "floor_area", "years_electrified","adults","children","part_time","unemployed"],
"features" : ["monthly_income", "water_access", "roof_material", "wall_material", "cb_size", "floor_area", "years_electrified","total_adults","total_children","total_part_time","total_unemployed"],
"searchlist" : ["income", "watersource", "roof", "wall", "main switch", "floor area", "years","males","part time","unemployed"],
"transform": {
"monthly_income" : "x['income']",
Expand All @@ -10,10 +10,10 @@
"cb_size" : "x['main switch']",
"floor_area" : "x['floor area']",
"years_electrified" : "x['years']",
"adults" : "x.fillna(0)['males16to24'] + x.fillna(0)['females16to24'] + x.fillna(0)['males25to34'] + x.fillna(0)['females25to34'] + x.fillna(0)['males35to49'] + x.fillna(0)['females35to49'] + x.fillna(0)['malesolder50'] + x.fillna(0)['femalesolder50']",
"children" : "x.fillna(0)['malesyounger16'] + x.fillna(0)['femalesyounger16']",
"part_time" : "x.fillna(0)['headparttime'] + x.fillna(0)['spouseparttime'] + x.fillna(0)['older16parttime']",
"unemployed" : "x.fillna(0)['headunemployed'] + x.fillna(0)['spouseunemployed'] + x.fillna(0)['older16unemployed']"
"total_adults" : "x.fillna(0)['males16to24'] + x.fillna(0)['females16to24'] + x.fillna(0)['males25to34'] + x.fillna(0)['females25to34'] + x.fillna(0)['males35to49'] + x.fillna(0)['females35to49'] + x.fillna(0)['malesolder50'] + x.fillna(0)['femalesolder50']",
"total_children" : "x.fillna(0)['malesyounger16'] + x.fillna(0)['femalesyounger16']",
"total_part_time" : "x.fillna(0)['headparttime'] + x.fillna(0)['spouseparttime'] + x.fillna(0)['older16parttime']",
"total_unemployed" : "x.fillna(0)['headunemployed'] + x.fillna(0)['spouseunemployed'] + x.fillna(0)['older16unemployed']"
},
"bins" : {},
"labels" : {},
Expand Down
2 changes: 1 addition & 1 deletion delprocess/surveys.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def generateSociosSetSingle(year, spec_file):
data[k] = data.apply(lambda x: eval(v), axis=1)

dropcols = [i for i in searchlist if i not in features]
data.drop(columns = searchlist, inplace=True, axis=1)
data.drop(columns = dropcols, inplace=True, axis=1)

# Adjust monthly income for inflation: baselined to
# Stats SA December 2016 values. Important that this happens here,
Expand Down

0 comments on commit a43c51b

Please sign in to comment.