-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_population.py
36 lines (27 loc) · 1.04 KB
/
generate_population.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# %%
import pandas as pd
import numpy as np
import argparse
# %%
def generate_uf_sample(df, uf, sample_weigth, std_pib=500):
_, media_pib, size_pop = df[df['uf'] == uf].iloc[0].values
return np.abs(np.random.normal(loc=media_pib,
scale=std_pib,
size=int(size_pop*sample_weigth)))
def generate_country_pib(df, sample_weigth=1):
ufs = df['uf'].unique().tolist()
dfs = []
for i in ufs:
df_tmp = pd.DataFrame({'pib_pessoa': generate_uf_sample(df, i, sample_weigth)})
df_tmp['uf'] = i
dfs.append(df_tmp)
return pd.concat(dfs)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-w", "--weigth", default=1, type=float, help="Peso para proporção da amostra em relação à população")
args = parser.parse_args()
df = pd.read_csv("data/pib_ufs_2015.csv")
df_pop = generate_country_pib(df, sample_weigth=args.weigth)
df_pop.to_csv("data/pop_uf_pib.csv", index=False)
if __name__ == "__main__":
main()