-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.py
84 lines (67 loc) · 2.67 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# %%
import json
import pandas as pd
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
import seaborn as sns
from sklearn.metrics import confusion_matrix
# %%
# Define Parse Function to streamline output
def parse(x):
if x in [0, "0", "False", "false", False, "Nein", "nein"]:
return "False"
elif x in [1, "1", "True", "true", True, "Ja", "ja"]:
return "True"
else:
return "False"
# %%
# load ground truth data
#gt_df = pd.read_excel("filegtpath.xlsx")
gt_df = pd.read_csv("filegtpath.csv")
gt_df.head()
#%%
# load predicted df
filename="filewithmodeloutput.csv"
pred_df = pd.read_csv(f"results\\{filename}")
pred_df.head()
#%%
# merge pred_df and gt_df on "report" column
df = gt_df.merge(pred_df, on="report", suffixes=[None, " pred"])
df.head()
#%%
# Parse the columns compl and compl pred
df['suizidal'] = df['suizidal'].map(parse)
df['suizidal pred'] = df['suizidal pred'].map(parse)
df.head()
###########################################################################
# Confusion matrix
# %%
symptoms = ["suizidal"]
for symptom in symptoms:
y_true = df[symptom]
y_pred = df[f"{symptom} pred"]
# Compute the confusion matrix (non-normalized for absolute numbers)
cm_absolute = confusion_matrix(y_true, y_pred)
# Normalize the confusion matrix for fractions
cm_normalized = cm_absolute.astype('float') / cm_absolute.sum(axis=1)[:, np.newaxis]
# Convert to DataFrame for easier plotting
cm_df = pd.DataFrame(cm_normalized, index=["False", "True"], columns=["False", "True"])
# Create annotations combining absolute numbers and fractions
annotations = [["{0:d}\n({1:.2f})".format(abs_num, frac) for abs_num, frac in zip(row_abs, row_frac)]
for row_abs, row_frac in zip(cm_absolute, cm_normalized)]
# Plotting the confusion matrix using Seaborn with increased font sizes
plt.figure(figsize=(8,6))
ax = sns.heatmap(cm_df, annot=annotations, fmt="", cmap='Blues', vmin=0, vmax=1, annot_kws={"size": 28})
plt.title(f'{filename}{symptom.capitalize()}', fontsize=28)
plt.ylabel('Actual Values', fontsize=18)
plt.xlabel('Predicted Values', fontsize=18)
# Set the font size for the tick labels (both axes)
ax.set_xticklabels(ax.get_xmajorticklabels(), fontsize = 28)
ax.set_yticklabels(ax.get_ymajorticklabels(), fontsize = 28)
# Increase font size of the colorbar
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=28)
plt.show()
# %%