-
Notifications
You must be signed in to change notification settings - Fork 2
/
logistic.py
40 lines (30 loc) · 1.15 KB
/
logistic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# tested on Python 3.6.3
# work dir must contain: data-logistic.csv
# performs logistic regressions with different penalties
import pandas # http://pandas.pydata.org/
import sklearn # http://scikit-learn.org/stable/
#import os # https://docs.python.org/3/library/os.html
from sklearn.linear_model import LogisticRegression
# set cd
#os.chdir('D:\Programming\Python\IntroML\Logistic')
# load data from csv
data = pandas.read_csv('data-logistic.csv', header=None)
X = data.iloc[:,1:3]
y = data.iloc[:,0]
# fit logistic regressions with different parameters of regularization
clf1 = LogisticRegression(penalty='l1')
clf2 = LogisticRegression(penalty='l2')
clf_noreg = LogisticRegression(C=10000)
clf1.fit(X,y)
clf2.fit(X,y)
clf_noreg.fit(X,y)
# prediction of all the models
prob1 = clf1.predict(X)
prob2 = clf1.predict(X)
prob_noreg = clf_noreg.predict(X)
# AUC-ROC scores for all models
auc1 = sklearn.metrics.roc_auc_score(y, prob1)
auc2 = sklearn.metrics.roc_auc_score(y, prob2)
auc_noreg = sklearn.metrics.roc_auc_score(y, prob_noreg)
print('AUC with regularization =', round(auc1,4))
print('AUC without regularization =', round(auc_noreg,4))