forked from Xuxl2020/ASD-XGB
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TS-TD.txt
127 lines (103 loc) · 3.44 KB
/
TS-TD.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
### stage 1: ASD_GDD 和 DLD
import pandas as pd
import numpy as np
from numpy import *
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, roc_auc_score,f1_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import keras
import math
from sklearn.model_selection import KFold
from keras.wrappers.scikit_learn import KerasClassifier
from xgboost.sklearn import XGBClassifier
import xgboost as xgb
from sklearn.utils.class_weight import compute_sample_weight
from keras.utils import to_categorical # one-hot编码
import os
import warnings
warnings.filterwarnings("ignore")
### load data
df = pd.read_csv('/home/kesci/input/data7617/dat.1944.csv')
tdf = pd.read_csv('/home/kesci/input/data7617/dat.60.csv')
df = df.replace('ASD', 0)
df = df.replace('DLD', 1)
df = df.replace('GDD', 0)
tdf = tdf.replace('ASD', 0)
tdf = tdf.replace('DLD', 1)
tdf = tdf.replace('GDD', 0)
col = ['diag','adaptation','social','fine','gross']
col1 = ['adaptation','social','fine','gross']
dat = df[col]
grp = dat['diag']
x_test = tdf[col1]
y_test = tdf['diag']
x_train = dat[col[1:]]
y_train = dat['diag']
xlf = XGBClassifier(
max_depth = 3,
learning_rate = 0.0001,
n_estimators = 1,
silent = True,
eval_metric = 'error',
objective = 'binary:logistic',
subsample = 1,
colsample_bytree = 1)
xgc = xlf
xgc.fit(x_train, y_train, sample_weight = compute_sample_weight("balanced", y_train))
pred_test = xgc.predict_proba(x_test)
### predict test
pred_res = np.argmax(pred_test, axis=1)
mat_test = confusion_matrix(y_test, pred_res)
acc_all_test = np.diag(mat_test)/mat_test.sum(axis=1)
acc_test = accuracy_score(y_test, pred_res)
acc1 = acc_all_test[0]
acc2 = acc_all_test[1]
print('acc:\n', acc_test)
print('r:\n', acc_all_test)
print('matrix:\n', mat_test)
xgb.to_graphviz(xgc, num_trees=0)
### stage2: ASD 和 GDD
df = pd.read_csv('/home/kesci/input/data7617/dat.1944.csv')
tdf = pd.read_csv('/home/kesci/input/data7617/dat.60.csv')
df = df.replace('ASD', 0)
df = df.replace('GDD', 1)
tdf = tdf.replace('ASD', 0)
tdf = tdf.replace('GDD', 1)
grp_train = df['diag']
grp_test = tdf['diag']
grp_train_ind1 = grp_train.index[grp_train == 0]
grp_train_ind2 = grp_train.index[grp_train == 1]
grp_train_ind = np.concatenate((grp_train_ind1, grp_train_ind2))
train = df.iloc[grp_train_ind]
col = ['diag','mchathigh']
x_train = train[col[1:]]
y_train = train['diag']
xlf = XGBClassifier(
learning_rate = 0.0001,
max_depth = 1,
n_estimators = 1,
silent = True,
eval_metric = 'error',
objective = 'binary:logistic',
subsample = 1,
colsample_bytree = 1)
xgc2 = xlf
xgc2.fit(x_train, y_train, sample_weight = compute_sample_weight("balanced", y_train))
# Select the samples whose prediction result is 0 in the first stage
a = np.where((pred_res==0) & ((grp_test==0)|((grp_test==1))))
x_test0 = tdf.iloc[a]
x_test = x_test0[['mchathigh']]
x_test.reset_index(drop=True, inplace=True)
y_test2 = grp_test.iloc[a]
y_test2.reset_index(drop=True, inplace=True)
pred_res2 = xgc2.predict(x_test)
print(pred_res2)
print(y_test2)
pred_res2 = pd.DataFrame(pred_res2)
y_test2 = pd.DataFrame(y_test2)
res = pd.concat([x_test, y_test2, pred_res2], axis=1)
res.to_csv('.../res.csv', index=False)
xgb.to_graphviz(xgc2, num_trees=0)