Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/cbert_precomputed_10stratified_f…
Browse files Browse the repository at this point in the history
…olds_multitask'
  • Loading branch information
joaorafaelalmeida committed Mar 4, 2022
2 parents a7d2c76 + ec09b60 commit 1c0ec96
Show file tree
Hide file tree
Showing 34 changed files with 10,407 additions and 209 deletions.
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ model/early_readmission/pytorch_model.bin
model/pretraining/pytorch_model.bin
external_repos/UMLS/2020AA/META/





# C extensions
*.so

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ More documentation about the system and how to run it is available [here](https:

Please cite the following, if you use PatientTM in your work:


```bib
Manuscript currently in development, will be updated as soon as possible.
```
Expand All @@ -41,4 +42,3 @@ PatientTM is under GPL-3.0 license. For more information, click
### Acknowledgements

We thank Kexin Huang for his publicly available repository for clinicalBERT, which was used as a base for exploring textual information from clinical notes. This repository was improved with numerous features, such as more robust data preprocessing and model validation mechanisms, and the resulting augmented version was integrated in PatientTM.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
cat /backup/UMLS/2020AA/META/MRCONSO.RRF | grep -E "\|ICD9CM" | cut -d "|" -f 12,13,14,15 --output-delimiter="|" >> /backup/joaofsilva/PatientTM/data/extended/preprocessing/icd9FromUMLS.txt
cat /backup/UMLS/2020AA/META/MRCONSO.RRF | grep -E "\|ICD9CM" | cut -d "|" -f 12,13,14,15 --output-delimiter="|" >> /backup/joaofsilva/clinicalBERT/data/extended/preprocessing/icd9FromUMLS.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
cat /backup/UMLS/2020AA/META/MRSAT.RRF | grep -E "\|NDC\|RXNORM" | cut -d "|" -f 1,9,10,11 >> /backup/joaofsilva/PatientTM/data/extended/preprocessing/ndcsFromUMLS.txt
cat /backup/UMLS/2020AA/META/MRSAT.RRF | grep -E "\|NDC\|RXNORM" | cut -d "|" -f 1,9,10,11 >> /backup/joaofsilva/clinicalBERT/data/extended/preprocessing/ndcsFromUMLS.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode discharge --data_dir /PatientTM/data/extended/discharge/ --bert_model /PatientTM/model/discharge_readmission --max_seq_length 512 --output_dir /PatientTM/results/result_discharge
CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode discharge --data_dir /clinicalBERT/data/extended/discharge/ --bert_model /clinicalBERT/model/discharge_readmission --max_seq_length 512 --output_dir /clinicalBERT/results/result_discharge
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode discharge --data_dir /PatientTM/data/extended_folds/discharge/ --bert_model /PatientTM/model/pretraining --max_seq_length 512 --output_dir /PatientTM/results/result_discharge
CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode discharge --data_dir /PatientFM/data/extended_folds/discharge/ --bert_model /PatientFM/model/pretraining --max_seq_length 512 --output_dir /PatientFM/results/result_discharge

CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode early --data_dir /PatientTM/data/extended_folds/early/ --bert_model /PatientTM/model/pretraining --max_seq_length 512 --output_dir /PatientTM/results/result_early
CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode early --data_dir /PatientFM/data/extended_folds/early/ --bert_model /PatientFM/model/pretraining --max_seq_length 512 --output_dir /PatientFM/results/result_early



CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode discharge_subjectsplit --data_dir /PatientTM/data/extended_folds/discharge_subjectsplit/ --bert_model /PatientTM/model/pretraining --max_seq_length 512 --output_dir /PatientTM/results/result_discharge
CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode discharge_subjectsplit --data_dir /PatientFM/data/extended_folds/discharge_subjectsplit/ --bert_model /PatientFM/model/pretraining --max_seq_length 512 --output_dir /PatientFM/results/result_discharge

CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode early_subjectsplit --data_dir /PatientTM/data/extended_folds/early_subjectsplit/ --bert_model /PatientTM/model/pretraining --max_seq_length 512 --output_dir /PatientTM/results/result_early
CUDA_VISIBLE_DEVICES=0 python3 ./cbert_precomputation.py --task_name readmission --readmission_mode early_subjectsplit --data_dir /PatientFM/data/extended_folds/early_subjectsplit/ --bert_model /PatientFM/model/pretraining --max_seq_length 512 --output_dir /PatientFM/results/result_early
60 changes: 30 additions & 30 deletions notebook/3_Replication_5fold_dataset_generation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@
" # lines=zip(file.HADM_ID,file.Label)\n",
" # return lines\n",
"\n",
"filepath = \"/PatientTM/data/extended/discharge/train.csv\"\n",
"filepath = \"/clinicalBERT/data/extended/discharge/train.csv\"\n",
"df_train = read_file(filepath)\n",
"df_train = df_train[df_train.Label == 0].HADM_ID\n",
"negative_HADMIDs_train = [hadm_id for hadm_id in df_train]\n",
"\n",
"filepath = \"/PatientTM/data/extended/discharge/val.csv\"\n",
"filepath = \"/clinicalBERT/data/extended/discharge/val.csv\"\n",
"df_val = read_file(filepath)\n",
"df_val = df_val[df_val.Label == 0].HADM_ID\n",
"negative_HADMIDs_val = [hadm_id for hadm_id in df_val]\n",
" \n",
"filepath = \"/PatientTM/data/extended/discharge/test.csv\"\n",
"filepath = \"/clinicalBERT/data/extended/discharge/test.csv\"\n",
"df_test = read_file(filepath)\n",
"df_test = df_test[df_test.Label == 0].HADM_ID\n",
"negative_HADMIDs_test = [hadm_id for hadm_id in df_test]\n",
"\n",
"negative_HADMIDs = negative_HADMIDs_train + negative_HADMIDs_val + negative_HADMIDs_test\n",
"with open(\"/PatientTM/data/extended/discharge/negative_hadmids.txt\", \"w\") as file:\n",
"with open(\"/clinicalBERT/data/extended/discharge/negative_hadmids.txt\", \"w\") as file:\n",
" for hadm in negative_HADMIDs:\n",
" file.write(str(hadm)+\"\\n\")\n",
" "
Expand Down Expand Up @@ -63,7 +63,7 @@
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"with open(\"/PatientTM/data/extended/discharge/negative_hadmids.txt\",\"r\") as file:\n",
"with open(\"/clinicalBERT/data/extended/discharge/negative_hadmids.txt\",\"r\") as file:\n",
" lines = file.readlines()\n",
" negative_HADMIDs = [line.strip(\"\\n\") for line in lines]\n",
" \n",
Expand All @@ -81,17 +81,17 @@
" return(entries)\n",
" \n",
" \n",
"# df_train = pd.read_csv(\"/PatientTM/data/extended/discharge/train.csv\")\n",
"# df_val = pd.read_csv(\"/PatientTM/data/extended/discharge/val.csv\")\n",
"# df_test = pd.read_csv(\"/PatientTM/data/extended/discharge/test.csv\")\n",
"# df_train = pd.read_csv(\"/clinicalBERT/data/extended/discharge/train.csv\")\n",
"# df_val = pd.read_csv(\"/clinicalBERT/data/extended/discharge/val.csv\")\n",
"# df_test = pd.read_csv(\"/clinicalBERT/data/extended/discharge/test.csv\")\n",
"# df_total = pd.concat([df_train, df_val, df_test])\n",
"\n",
"df_total = pd.read_csv(\"/PatientTM/data/extended_folds/discharge/fold0_text.csv\")\n",
"array = read_precomputed_npy(\"/PatientTM/data/extended_folds/discharge/fold0_text_precomputed.npy\")\n",
"df_total = pd.read_csv(\"/clinicalBERT/data/extended_folds/discharge/fold0_text.csv\")\n",
"array = read_precomputed_npy(\"/clinicalBERT/data/extended_folds/discharge/fold0_text_precomputed.npy\")\n",
"for i in range(1,10):\n",
" df_fold = pd.read_csv(\"/PatientTM/data/extended_folds/discharge/fold\"+str(i)+\"_text.csv\")\n",
" df_fold = pd.read_csv(\"/clinicalBERT/data/extended_folds/discharge/fold\"+str(i)+\"_text.csv\")\n",
" df_total = pd.concat([df_total, df_fold])\n",
" array_fold = read_precomputed_npy(\"/PatientTM/data/extended_folds/discharge/fold\" + str(i) + \"_text_precomputed.npy\")\n",
" array_fold = read_precomputed_npy(\"/clinicalBERT/data/extended_folds/discharge/fold\" + str(i) + \"_text_precomputed.npy\")\n",
" array = np.concatenate([array, array_fold])\n",
" \n",
"precomputed_df = pd.DataFrame.from_dict(list(array))\n",
Expand All @@ -114,12 +114,12 @@
" print(\"Fold Number {}:\\n\".format(i))\n",
" fold_HADM_IDs = df_total_valid_HADMID[test_index]\n",
" fold_df = df_total_valid[df_total_valid.HADM_ID.isin(fold_HADM_IDs)].reset_index(drop=True)\n",
" filename = '/PatientTM/data/extended/discharge/fold' + str(i) + '_text.csv'\n",
" filename = '/clinicalBERT/data/extended/discharge/fold' + str(i) + '_text.csv'\n",
" fold_df.to_csv(filename, columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"TEXT\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
" fold_precomputed_df = precomputed_df[precomputed_df.HADM_ID.isin(fold_HADM_IDs)].reset_index(drop=True)\n",
" fold_precomputed_entries = df_to_dict_array(fold_precomputed_df)\n",
" fold_array = np.asarray(fold_precomputed_entries, dtype=object) \n",
" array_filepath = \"/PatientTM/data/extended/discharge/fold\" + str(i) + \"_text_precomputed.npy\"\n",
" array_filepath = \"/clinicalBERT/data/extended/discharge/fold\" + str(i) + \"_text_precomputed.npy\"\n",
" np.save(array_filepath, fold_array)\n",
" i+=1\n",
" "
Expand All @@ -144,13 +144,13 @@
],
"source": [
"import pandas as pd\n",
"# df_test = pd.read_csv(\"/PatientTM/data/extended/discharge/test.csv\")\n",
"# df_test = pd.read_csv(\"/clinicalBERT/data/extended/discharge/test.csv\")\n",
"# print(np.unique(df_test.Label, return_counts=True)[1])\n",
"df_0 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold0_text.csv\")\n",
"df_1 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold1_text.csv\")\n",
"df_2 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold2_text.csv\")\n",
"df_3 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold3_text.csv\")\n",
"df_4 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold4_text.csv\")\n",
"df_0 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold0_text.csv\")\n",
"df_1 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold1_text.csv\")\n",
"df_2 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold2_text.csv\")\n",
"df_3 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold3_text.csv\")\n",
"df_4 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold4_text.csv\")\n",
"# print(np.unique(df_0.Label, return_counts=True)[1])\n",
"# print(np.unique(df_1.Label, return_counts=True)[1])\n",
"# print(np.unique(df_2.Label, return_counts=True)[1])\n",
Expand Down Expand Up @@ -191,31 +191,31 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"df_0 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold0_text.csv\")\n",
"df_1 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold1_text.csv\")\n",
"df_2 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold2_text.csv\")\n",
"df_3 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold3_text.csv\")\n",
"df_4 = pd.read_csv(\"/PatientTM/data/extended/discharge/fold4_text.csv\")\n",
"df_0 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold0_text.csv\")\n",
"df_1 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold1_text.csv\")\n",
"df_2 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold2_text.csv\")\n",
"df_3 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold3_text.csv\")\n",
"df_4 = pd.read_csv(\"/clinicalBERT/data/extended/discharge/fold4_text.csv\")\n",
"\n",
"df_0_notext = df_0.drop([\"TEXT\"], axis=1)\n",
"df_0_notext = df_0_notext.drop_duplicates(subset=[\"HADM_ID\"])\n",
"df_0_notext.to_csv(\"/PatientTM/data/extended/discharge/fold0_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"df_0_notext.to_csv(\"/clinicalBERT/data/extended/discharge/fold0_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"\n",
"df_1_notext = df_1.drop([\"TEXT\"], axis=1)\n",
"df_1_notext = df_1_notext.drop_duplicates(subset=[\"HADM_ID\"])\n",
"df_1_notext.to_csv(\"/PatientTM/data/extended/discharge/fold1_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"df_1_notext.to_csv(\"/clinicalBERT/data/extended/discharge/fold1_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"\n",
"df_2_notext = df_2.drop([\"TEXT\"], axis=1)\n",
"df_2_notext = df_2_notext.drop_duplicates(subset=[\"HADM_ID\"])\n",
"df_2_notext.to_csv(\"/PatientTM/data/extended/discharge/fold2_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"df_2_notext.to_csv(\"/clinicalBERT/data/extended/discharge/fold2_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"\n",
"df_3_notext = df_3.drop([\"TEXT\"], axis=1)\n",
"df_3_notext = df_3_notext.drop_duplicates(subset=[\"HADM_ID\"])\n",
"df_3_notext.to_csv(\"/PatientTM/data/extended/discharge/fold3_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"df_3_notext.to_csv(\"/clinicalBERT/data/extended/discharge/fold3_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"\n",
"df_4_notext = df_4.drop([\"TEXT\"], axis=1)\n",
"df_4_notext = df_4_notext.drop_duplicates(subset=[\"HADM_ID\"])\n",
"df_4_notext.to_csv(\"/PatientTM/data/extended/discharge/fold4_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"df_4_notext.to_csv(\"/clinicalBERT/data/extended/discharge/fold4_notext.csv\", columns=[\"SUBJECT_ID\",\"HADM_ID\",\"ADMITTIME\",\"DAYS_NEXT_ADMIT\",\"DAYS_PREV_ADMIT\",\"DURATION\",\"DIAG_ICD9\",\"SMALL_DIAG_ICD9\",\"DIAG_CCS\",\"PROC_ICD9\",\"SMALL_PROC_ICD9\",\"PROC_CCS\",\"NDC\",\"CUI\",\"Label\",\"NEXT_SMALL_DIAG_ICD9\",\"NEXT_DIAG_CCS\",\"NEXT_SMALL_PROC_ICD9\",\"NEXT_PROC_CCS\",\"NEXT_CUI\"])\n",
"\n"
]
},
Expand Down
6 changes: 3 additions & 3 deletions notebook/4_Diagnosis_prediction_data_preparation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
"\n",
"for i in range(cvFolds):\n",
" print(\"Fold Number {}:\\n\".format(i))\n",
" filename = '/PatientTM/data/extended_folds/discharge_subjectsplit/fold' + str(i)\n",
" filename = '/clinicalBERT/data/extended_folds/discharge_subjectsplit/fold' + str(i)\n",
" taskname = '_codeprediction'\n",
" # extension = '_notext.csv'\n",
" extension = '_text.csv'\n",
Expand Down Expand Up @@ -198,8 +198,8 @@
"\n",
"for i in range(cvFolds):\n",
" print(\"Fold Number {}:\\n\".format(i))\n",
" origin_filename = '/PatientTM/data/extended_folds/fold' + str(i) +'_codeprediction'\n",
" destination_filename = '/PatientTM/data/extended_folds/discharge_subjectsplit/fold' + str(i) + '_notext.csv'\n",
" origin_filename = '/clinicalBERT/data/extended_folds/fold' + str(i) +'_codeprediction'\n",
" destination_filename = '/clinicalBERT/data/extended_folds/discharge_subjectsplit/fold' + str(i) + '_notext.csv'\n",
" # extension = '_text.csv'\n",
" foldDf = pd.read_csv(origin_filename+extension).drop([\"Unnamed: 0\"], axis=1) \n",
" cleanFoldDf = foldDf[foldDf['NEXT_DIAG_CCS'].notna()].reset_index(drop=True)\n",
Expand Down
Loading

0 comments on commit 1c0ec96

Please sign in to comment.