-
Notifications
You must be signed in to change notification settings - Fork 119
/
原始数据获取.py
256 lines (224 loc) · 13.4 KB
/
原始数据获取.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# -*- coding: utf-8 -*-
"""
阿尔法收割者
Project: alphasickle
Author: Moses
E-mail: [email protected]
"""
from joblib import Parallel, delayed
from raw_data_fetch import TushareFetcher, WindFetcher
#---------------------------------------------------------------
# Tushare数据源
#---------------------------------------------------------------
def TushareFetch():
fetcher = TushareFetcher()
#---------------------------------------------------------------
# 先下载数据到本地
#---------------------------------------------------------------
fetcher.fetch_meta_data()
fetcher.fetch_trade_day()
fetcher.fetch_month_map()
fetcher.ensure_data(fetcher.daily, "__temp_daily__") #日行情表
fetcher.ensure_data(fetcher.suspend_d, "__temp_suspend_d__") #停牌表
fetcher.ensure_data(fetcher.limit_list, "__temp_limit_list__") #涨跌停表
fetcher.ensure_data(fetcher.adj_factor, "__temp_adj_factor__") #复权因子表
fetcher.ensure_data(fetcher.daily_basic, "__temp_daily_basic__") #每日指标表
fetcher.ensure_data(fetcher.moneyflow, "__temp_moneyflow__") #资金流表
fetcher.ensure_data_by_q(fetcher.fina_indicator, "__temp_fina_indicator__") #财务指标表
fetcher.ensure_data_by_q(fetcher.income, "__temp_income__") #利润表
fetcher.ensure_data_by_q(fetcher.balancesheet, "__temp_balancesheet__") #资产负债表
fetcher.ensure_data_by_q(fetcher.cashflow, "__temp_cashflow__") #现金流表
fetcher.index_daily()
#---------------------------------------------------------------
# 然后从本地数据生成指标
#---------------------------------------------------------------
fetcher.create_listday_matrix()
fetcher.create_month_tdays_begin_end()
fetcher.create_turn_d()
fetcher.create_trade_status()
fetcher.create_maxupordown()
fetcher.create_indicator("__temp_adj_factor__", "adj_factor", "adjfactor")
fetcher.create_mkt_cap_float_m()
fetcher.create_pe_ttm_m()
fetcher.create_val_pe_deducted_ttm_m()
fetcher.create_pb_lf_m()
fetcher.create_ps_ttm_m()
fetcher.create_pcf_ncf_ttm_m()
fetcher.create_pcf_ocf_ttm_m()
fetcher.create_dividendyield2_m()
fetcher.create_profit_ttm_G_m()
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "q_sales_yoy", "qfa_yoysales_m")
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "q_profit_yoy", "qfa_yoyprofit_m")
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "ocf_yoy", "qfa_yoyocf_m") #临时替代
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "roe_yoy", "qfa_roe_G_m") #临时替代
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "q_roe", "qfa_roe_m")
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "roe_yearly", "roe_ttm2_m") #临时替代
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "roa", "qfa_roa_m") #临时替代
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "roa_yearly", "roa2_ttm2_m") #临时替代
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "q_gsprofit_margin", "qfa_grossprofitmargin_m")
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "grossprofit_margin", "grossprofitmargin_ttm2_m") #临时替代
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "assets_turn", "turnover_ttm_m") #临时替代
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "assets_to_eqt", "assetstoequity_m")
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "debt_to_eqt", "longdebttoequity_m") #临时替代
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "cash_to_liqdebt", "cashtocurrentdebt_m")
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "current_ratio", "current_m")
fetcher.create_daily_quote_indicators()
fetcher.create_indicator("__temp_daily_basic__", "circ_mv", "mkt_cap_float")
fetcher.create_indicator("__temp_daily_basic__", "total_mv", "mkt_cap_ard")
fetcher.create_indicator_m_by_q("__temp_fina_indicator__", "longdeb_to_debt", "longdebttodebt_lyr_m")
fetcher.create_indicator_m_by_q("__temp_balancesheet__", "total_liab", "tot_liab_lyr_m")
fetcher.create_indicator_m_by_q("__temp_balancesheet__", "oth_eqt_tools_p_shr", "other_equity_instruments_PRE_lyr_m")
fetcher.create_indicator_m_by_q("__temp_balancesheet__", "total_hldr_eqy_inc_min_int", "tot_equity_lyr_m")
fetcher.create_indicator_m_by_q("__temp_balancesheet__", "total_assets", "tot_assets_lyr_m")
#---------------------------------------------------------------
# Wind数据源
#---------------------------------------------------------------
fetcher = WindFetcher()
def profit_ttm_G_m(): #净利润(ttm)同比增长率
fetcher.create_profit_ttm_G_m()
def qfa_yoysales_m(): #营业收入(单季同比%)/成长
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_QFA_YOYSALES", "qfa_yoysales_m")
def qfa_yoyprofit_m(): #净利润(单季同比%)/成长
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_QFA_YOYPROFIT", "qfa_yoyprofit_m")
def qfa_yoyocf_m(): #经营现金流(单季同比%)/成长
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_YOYOCF", "qfa_yoyocf_m")
def qfa_roe_G_m(): #ROE(单季)同比增长率/成长
fetcher.create_qfa_roe_G_m()
def roe_ttm2_m(): #ROE_ttm/财务质量
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator_ttm__", "S_FA_ROE_TTM", "roe_ttm2_m")
def qfa_roa_m(): #ROA(单季)/财务质量
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_QFA_ROA", "qfa_roa_m")
def roa2_ttm2_m(): #ROA_ttm/财务质量
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator_ttm__", "S_FA_ROA2_TTM", "roa2_ttm2_m")
def qfa_grossprofitmargin_m(): #毛利率(单季)/财务质量
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_QFA_GROSSPROFITMARGIN", "qfa_grossprofitmargin_m")
def grossprofitmargin_ttm2_m(): #毛利率(ttm)/财务质量
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator_ttm__", "S_FA_GROSSPROFITMARGIN_TTM", "grossprofitmargin_ttm2_m")
def turnover_ttm_m(): #总资产周转率(ttm)/财务质量
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_ASSETSTURN", "turnover_ttm_m") #临时替代
def assetstoequity_m(): #权益乘数/杠杆
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_ASSETSTOEQUITY", "assetstoequity_m")
def longdebttoequity_m(): #非流动负债权益比/杠杆
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_DEBTTOEQUITY", "longdebttoequity_m") #临时替代
def cashtocurrentdebt_m(): #现金比率/杠杆
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_CASHTOLIQDEBT", "cashtocurrentdebt_m")
def current_m(): #流动比率/杠杆
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_CURRENT", "current_m")
def longdebttodebt_lyr_m():
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_LONGDEBTODEBT", "longdebttodebt_lyr_m")
def tot_liab_lyr_m():
fetcher.create_indicator_m_by_q_ex("__temp_balancesheet__", "TOT_LIAB", "tot_liab_lyr_m")
def other_equity_instruments_PRE_lyr_m():
fetcher.create_indicator_m_by_q_ex("__temp_balancesheet__", "OTHER_EQUITY_TOOLS_P_SHR", "other_equity_instruments_PRE_lyr_m")
def tot_equity_lyr_m():
fetcher.create_indicator_m_by_q_ex("__temp_balancesheet__", "TOT_SHRHLDR_EQY_INCL_MIN_INT", "tot_equity_lyr_m")
def tot_assets_lyr_m():
fetcher.create_indicator_m_by_q_ex("__temp_balancesheet__", "TOT_ASSETS", "tot_assets_lyr_m")
def WindFetch():
#---------------------------------------------------------------
# 先下载数据到本地
#---------------------------------------------------------------
#fetcher.ensure_data(fetcher.daily, "__temp_daily__") #日行情表
#fetcher.ensure_data(fetcher.daily_basic, "__temp_daily_basic__") #每日指标表
#fetcher.ensure_data_by_q(fetcher.fina_indicator, "__temp_fina_indicator__") #财务指标表
#fetcher.ensure_data_by_q(fetcher.fina_indicator_ttm, "__temp_fina_indicator_ttm__") #财务指标(TTM)表
#fetcher.ensure_data_by_q(fetcher.income, "__temp_income__") #利润表
#fetcher.ensure_data_by_q(fetcher.balancesheet, "__temp_balancesheet__") #资产负债表
#fetcher.ensure_data_by_q(fetcher.cashflow, "__temp_cashflow__") #现金流表
#fetcher.ensure_data(fetcher.suspend_d, "__temp_suspend_d__") #停牌表
#---------------------------------------------------------------
# 然后从本地数据生成指标
#---------------------------------------------------------------
#并行方式
function_list = [
delayed(profit_ttm_G_m)(),
delayed(qfa_yoysales_m)(),
delayed(qfa_yoyprofit_m)(),
delayed(qfa_yoyocf_m)(),
delayed(qfa_roe_G_m)(),
delayed(roe_ttm2_m)(),
delayed(qfa_roa_m)(),
delayed(roa2_ttm2_m)(),
delayed(qfa_grossprofitmargin_m)(),
delayed(grossprofitmargin_ttm2_m)(),
delayed(turnover_ttm_m)(),
delayed(assetstoequity_m)(),
delayed(longdebttoequity_m)(),
delayed(cashtocurrentdebt_m)(),
delayed(current_m)(),
delayed(longdebttodebt_lyr_m)(),
delayed(tot_liab_lyr_m)(),
delayed(other_equity_instruments_PRE_lyr_m)(),
delayed(tot_equity_lyr_m)(),
delayed(tot_assets_lyr_m)(),
]
Parallel(n_jobs=10, backend='multiprocessing')(function_list) #并行化处理
#串行方式
#fetcher.create_trade_status()
'''
通过日频行情数据创建日频指标
空值填充方式: 先ffill 后bfill
'''
#fetcher.create_daily_quote_indicators()
'''
通过日频指标数据创建日频指标
空值填充方式: 先ffill 后bfill
'''
fetcher.create_daily_basic_indicators()
'''
通过日频指标数据创建月频指标
空值填充方式: 先ffill 后bfill
备注:最合理的填充方式应该是先把基础日频数据进行空值填充,然后再进行月频采样,而不是先月频采样再进行空值填充
'''
fetcher.create_indicator_m_by_d_ex("__temp_daily_basic__", "circ_mv", "mkt_cap_float")
fetcher.create_indicator_m_by_d_ex("__temp_daily_basic__", "pe_ttm", "pe_ttm")
fetcher.create_indicator_m_by_d_ex("__temp_daily_basic__", "pe", "val_pe_deducted_ttm")
fetcher.create_indicator_m_by_d_ex("__temp_daily_basic__", "pb", "pb_lf")
fetcher.create_indicator_m_by_d_ex("__temp_daily_basic__", "ps_ttm", "ps_ttm")
fetcher.create_indicator_m_by_d_ex("__temp_daily_basic__", "dv_ttm", "dividendyield2")
'''
通过季频财务数据创建月频指标
空值填充方式: 先ffill 后bfill
'''
fetcher.create_profit_ttm_G_m()
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_QFA_YOYSALES", "qfa_yoysales_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_QFA_YOYPROFIT", "qfa_yoyprofit_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_YOYOCF", "qfa_yoyocf_m")
fetcher.create_qfa_roe_G_m()
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator_ttm__", "S_FA_ROE_TTM", "roe_ttm2_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_QFA_ROA", "qfa_roa_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator_ttm__", "S_FA_ROA_TTM", "roa2_ttm2_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_QFA_GROSSPROFITMARGIN", "qfa_grossprofitmargin_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator_ttm__", "S_FA_GROSSPROFITMARGIN_TTM", "grossprofitmargin_ttm2_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_ASSETSTURN", "turnover_ttm_m") #临时替代
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_ASSETSTOEQUITY", "assetstoequity_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_DEBTTOEQUITY", "longdebttoequity_m") #临时替代
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_CASHTOLIQDEBT", "cashtocurrentdebt_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_CURRENT", "current_m")
fetcher.create_indicator_m_by_q_ex("__temp_fina_indicator__", "S_FA_LONGDEBTODEBT", "longdebttodebt_lyr_m")
fetcher.create_indicator_m_by_q_ex("__temp_balancesheet__", "TOT_LIAB", "tot_liab_lyr_m")
fetcher.create_indicator_m_by_q_ex("__temp_balancesheet__", "OTHER_EQUITY_TOOLS_P_SHR", "other_equity_instruments_PRE_lyr_m")
fetcher.create_indicator_m_by_q_ex("__temp_balancesheet__", "TOT_SHRHLDR_EQY_INCL_MIN_INT", "tot_equity_lyr_m")
fetcher.create_indicator_m_by_q_ex("__temp_balancesheet__", "TOT_ASSETS", "tot_assets_lyr_m")
if __name__ == '__main__':
WindFetch()
#TushareFetch()
'''
path = os.path.dirname(os.path.dirname(__file__))
df = pd.read_csv(os.path.join(path, "industry_zx.csv"), index_col=[0], engine='python', encoding='gbk')
df.columns = pd.to_datetime(df.columns)
new_df = pd.DataFrame(index=df.index)
def _get_month_end(date):
import calendar
import pandas.tseries.offsets as toffsets
_, days = calendar.monthrange(date.year, date.month)
if date.day == days:
return date
else:
return date + toffsets.MonthEnd(n=1)
for tday in df.columns.tolist():
cday = _get_month_end(tday)
new_df[cday] = df[tday]
print(new_df.iloc[0:10])
new_df.to_csv(os.path.join(path, 'industry_citic.csv'), encoding='gbk')
'''