diff --git a/mimic-iii/notebooks/polypharmacy.ipynb b/mimic-iii/notebooks/polypharmacy.ipynb
new file mode 100644
index 000000000..12ae575e4
--- /dev/null
+++ b/mimic-iii/notebooks/polypharmacy.ipynb
@@ -0,0 +1,1526 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a19f3b6a",
+   "metadata": {
+    "id": "a19f3b6a"
+   },
+   "source": [
+    "# Defining a polypharmacy variable in MIMIC-III"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "28cb57cc",
+   "metadata": {
+    "id": "28cb57cc"
+   },
+   "source": [
+    "Polypharmacy refers to the concurrent use of multiple medications. This notebook aims to define a binary polypharmacy variable for hospital stays using the prescriptions table. Here, polypharmacy is defined for a hospital stay ('hadm_id') as the concurrent prescription of 5 or more drugs for the same day."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e887f9cd",
+   "metadata": {
+    "id": "e887f9cd"
+   },
+   "outputs": [],
+   "source": [
+    "# Import libraries\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import psycopg2\n",
+    "from IPython.display import display, HTML # used to print out pretty pandas dataframes\n",
+    "import matplotlib.dates as dates\n",
+    "import matplotlib.lines as mlines\n",
+    "from itertools import combinations\n",
+    "import datetime\n",
+    "import re\n",
+    "import drugstandards as drugs\n",
+    "\n",
+    "%matplotlib inline\n",
+    "plt.style.use('ggplot') \n",
+    "\n",
+    "# specify user/password/where the database is\n",
+    "sqluser = 'postgres'\n",
+    "sqlpass = 'postgres'\n",
+    "dbname = 'mimic'\n",
+    "schema_name = 'mimiciii'\n",
+    "host = 'localhost'\n",
+    "\n",
+    "query_schema = 'SET search_path to ' + schema_name + ';'\n",
+    "\n",
+    "# connect to the database\n",
+    "con = psycopg2.connect(dbname=dbname, user=sqluser, password=sqlpass, host=host)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c1d2bc07",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "c1d2bc07",
+    "outputId": "e4320197-bcc3-4472-f7e1-06ebe301c222",
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\manue\\anaconda3\\lib\\site-packages\\pandas\\io\\sql.py:761: UserWarning: pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = query_schema + \"\"\"\n",
+    "SELECT subject_id, hadm_id, icustay_id, drug_type, drug, formulary_drug_cd, route, startdate, enddate\n",
+    "FROM prescriptions\n",
+    "\"\"\"\n",
+    "prescriptions = pd.read_sql_query(query, con)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "qVlUZ1vUZdnR",
+   "metadata": {
+    "id": "qVlUZ1vUZdnR"
+   },
+   "source": [
+    "The number of unique drugs in the prescriptions table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "086f11bc",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "086f11bc",
+    "outputId": "68e54105-4bdc-412c-9f0c-d60668326f33"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4525"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions['drug'].nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c8c225ed",
+   "metadata": {
+    "id": "c8c225ed"
+   },
+   "source": [
+    "### Drug prescriptions of type 'BASE' (shown below) are excluded from the calculation of polypharmacy based on expert advice. These are mostly items prescribed to make up for deficiencies in a hospital setting.</font>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "09c69d9c",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "09c69d9c",
+    "outputId": "99b5f275-bcee-4827-833f-c5e2ee0b9391",
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['D5W', 'Iso-Osmotic Dextrose', 'SW', 'Send 500mg Vial',\n",
+       "       'Syringe (Neonatal) *D5W*', 'NS', 'Soln.', 'NS (Glass Bottle)',\n",
+       "       '1/2 NS', 'D5 1/2NS', 'Syringe (IV Room)', 'Vial', 'Dextrose 5%',\n",
+       "       'Sterile Water', 'D5W (EXCEL BAG)', 'NS (Mini Bag Plus)', 'Soln',\n",
+       "       'LR', 'Syringe', 'Syringe (Neonatal)', 'D10W',\n",
+       "       'Potassium Chl 20 mEq / 1000 mL D5 1/2 NS',\n",
+       "       'Potassium Chl 20 mEq / 1000 mL NS', 'Isotonic Sodium Chloride',\n",
+       "       'Iso-Osmotic Sodium Chloride', '0.9% Sodium Chloride',\n",
+       "       '5% Dextrose', 'Amino Acids 4.25% W/ Dextrose 5%',\n",
+       "       'Syringe (Neonatal) *SW*', '0.45% Sodium Chloride',\n",
+       "       'Potassium Chl 20 mEq / 1000 mL D5NS', 'Solution', 'D5NS',\n",
+       "       'Sodium Chloride 3% (Hypertonic)', 'D5W (Excel)',\n",
+       "       'NS Epidural Bag (0.9% NaCl)', 'Bag', 'AMP', 'Syringe (PCA)',\n",
+       "       'D5W (GLASS BOTTLE)', 'Starter PN D10', 'Prismasate (B32 K2)',\n",
+       "       'D12.5W', 'Potassium Chl 40 mEq / 1000 mL D5 1/2 NS',\n",
+       "       '0.9% Sodium Chloride (Mini Bag Plus)', 'D7.5W', 'Starter PN D5',\n",
+       "       'Syringe (Neonatal) *NS*', 'Sterile Water For Irrigation',\n",
+       "       'Fat Emulsion 20%', 'Potassium Chl 40 mEq / 1000 mL D5NS',\n",
+       "       '1/4 NS', 'D5W (Glass Bottle)', 'NS (Syringe)', 'D5LR',\n",
+       "       'Potassium Chl 20 mEq / 1000 mL D5LR', 'NS        (Glass Bottle)',\n",
+       "       'Trisodium Citrate 0.4% Replacement Soln',\n",
+       "       'Normocarb Dialysis Soln', 'Syringe (Chemo)',\n",
+       "       'Potassium Chl 40 mEq / 1000 mL NS', 'Syringe (Intrapleural)',\n",
+       "       'NS Epidural Bag ', 'Sterile Diluent for Flolan',\n",
+       "       'Prismasate (B22 K4)', 'Intralipid 20%',\n",
+       "       'TRISODIUM CITRATE 4% REPLACEMENT FLUID (Dialysis)',\n",
+       "       'NS (Irrigation Bottle)', 'Syringe (Neonatal) *D10W*',\n",
+       "       'Travasol 4.25% W/ Dextrose 5%', 'Syringe (0.9% Sodium Chloride)',\n",
+       "       'Syringe (Intrathecal)', 'Amino Acids 5%-Dextrose 15%', 'D15W',\n",
+       "       'D25W', 'Potassium Chl 40 mEq / 1000 mL D5W', 'D5 1/4NS', 'D17.5W',\n",
+       "       'Citrate Dextrose 3% (ACD-A) CRRT', '5% Dextrose (EXCEL BAG)',\n",
+       "       'Lactated Ringers', 'D20W', 'Lipid Emulsion 20%', 'ACD-A for CRRT',\n",
+       "       'BTL', 'Bottle', 'Syringe (PD)', 'CVVH Citrated',\n",
+       "       'NS (IRRIGATION BOTTLE)', 'Syringe (NS)',\n",
+       "       'Potassium Chl 20 mEq / 1000 mL D5W', 'NS (Excel)',\n",
+       "       'Dextrose 20% in Water (D20W)', '0.83% Sodium Chloride',\n",
+       "       'Syringe (Intraperitoneal)', '0.45 % Sodium Chloride',\n",
+       "       'PFNS (Syringe)', 'NS  W/  0.1% Albumin', 'D12W',\n",
+       "       'D5W W 0.2% Albumin', 'Syringe (SW)', 'TB Syringe',\n",
+       "       'NS (Intrathecal Bag)', 'PlasmaLyte',\n",
+       "       'Trisodium Citrate 4% Replacement Soln',\n",
+       "       'NS Syringe (Intrapleural)', ' ', 'Fat Emulsion 10%',\n",
+       "       'NS Epidural Bag', 'Syringe (Neonatal) *D7.5W*', 'D30W',\n",
+       "       'Blue CADD Cassette', 'Syringe (Neonatal)  *D12.5W*',\n",
+       "       'Yellow CADD Cassette', 'Epidural Bag', 'Liposyn II  20%',\n",
+       "       'D5W W/ 0.1% Albumin', 'D5W 0.1% Albumin', 'Amino Acids 3.5%',\n",
+       "       'Prismasate (Ca 3.5)', '5 Syringes (NS)',\n",
+       "       'Amino Acids 4.25%-Dextrose 5%', 'PrismaSol  (B32 K0)',\n",
+       "       'Prismasate (K 2)', 'Syringe (Intraventricular)',\n",
+       "       'Sodium CITRATE 4%', 'Bottle, Glass', '300 mcg Vial',\n",
+       "       '0.9% Sodium Chloride (EXCEL BAG)', '250 mL Bottle',\n",
+       "       '6 Syringes (NS)', 'Syringe (Sterile Water)', 'CADD Cassette',\n",
+       "       '0.9% Sodium Chloride P.F. (Syringe)', '5% Dextrose (Non DEPH)',\n",
+       "       'Syringe (LR)'], dtype=object)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions[prescriptions['drug_type']=='BASE']['drug'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f136f1ab",
+   "metadata": {
+    "id": "f136f1ab"
+   },
+   "outputs": [],
+   "source": [
+    "prescriptions_to_include = prescriptions[prescriptions['drug_type']!='BASE']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "234d79ae",
+   "metadata": {
+    "id": "234d79ae"
+   },
+   "source": [
+    "### Some drugs of drug_type 'BASE' also fall under drug_type 'MAIN'. These drugs are also exluded when creating the polypharmacy variable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "98e12b44",
+   "metadata": {
+    "id": "98e12b44"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Prismasate (B32 K2)',\n",
+       " 'Sodium CITRATE 4%',\n",
+       " 'Citrate Dextrose 3% (ACD-A) CRRT',\n",
+       " 'Intralipid 20%',\n",
+       " 'Prismasate (B22 K4)',\n",
+       " 'Syringe (Neonatal)',\n",
+       " 'PrismaSol  (B32 K0)']"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bases = prescriptions[prescriptions['drug_type']=='BASE']['drug'].unique().tolist()\n",
+    "mains = prescriptions[prescriptions['drug_type']=='MAIN']['drug'].unique().tolist()\n",
+    "bases_in_mains = list(set(mains) & set(bases))\n",
+    "bases_in_mains"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "6ecfec80",
+   "metadata": {
+    "id": "6ecfec80"
+   },
+   "outputs": [],
+   "source": [
+    "prescriptions_to_include = prescriptions_to_include[~prescriptions_to_include['drug'].isin(bases_in_mains)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b17b8715",
+   "metadata": {
+    "id": "b17b8715"
+   },
+   "source": [
+    "### From the drug prescriptions of type 'ADDITIVE', a set of drugs were excluded in the calculation of polypharmacy based on expert advice."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d5acf554",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "d5acf554",
+    "outputId": "880aa481-ad35-4b7c-9183-67306a5574cc"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['Calcium Gluconate', 'Potassium Chloride', 'Sodium Chloride ',\n",
+       "       'Sodium Acetate', 'Heparin (Preservative Free)',\n",
+       "       'Potassium Acetate', 'Multivitamin 12', 'Folic Acid',\n",
+       "       'Multivitamin-12', 'Thiamine HCl', 'Sodium Chloride',\n",
+       "       'Hydrocortisone Na Succ.', 'Heparin Sodium', 'Bupivacaine 0.5%',\n",
+       "       'Epoetin Alfa', 'Sodium Chloride 23.4%', 'Sodium Bicarbonate',\n",
+       "       'Multivitamin IV', 'Sodium Chloride 0.9%',\n",
+       "       'Hydrocortisone Na Succinate', 'Heparin (Preserv. Free)',\n",
+       "       'Bupivacaine 0.50%', 'Lidocaine 1% PF', 'Potassium Phosphate',\n",
+       "       'Octreotide Acetate', 'Heparin', 'Mannitol', 'Lidocaine 1% P.F.',\n",
+       "       'Magnesium Sulfate', 'Thiamine', 'Mesna', 'Fentanyl Citrate',\n",
+       "       'Dextrose 50%', 'Albumin 25% (12.5gm)', 'Calcium Chloride',\n",
+       "       'Cytarabine', 'VinCRIStine', 'Humulin-R Insulin', 'Mannitol 20%',\n",
+       "       'Multivitamin IV (Without Vit. K)', 'Bupivacaine 0.05%',\n",
+       "       'Bupivacaine 0.1%', 'Insulin Human Regular', 'Phytonadione',\n",
+       "       'Famotidine', 'Vincristine Sulfate', 'Lidocaine 1%',\n",
+       "       'Normocarb Dialysate', 'CloniDINE', 'Sodium Phosphate',\n",
+       "       'Clonidine HCl', 'Hydromorphone P.F.', 'KCl',\n",
+       "       'Hydrocortisone Sod Succinate', 'FoLIC Acid',\n",
+       "       'VinCRIStine (Oncovin)', 'Hydrocortisone Na Succ', 'Ondansetron',\n",
+       "       'Fosaprepitant', 'Hydrocortisone', 'Bupivacaine 0.2%',\n",
+       "       'HYDROmorphone P.F.', 'Albumin', 'Albumin 25% (12.5g / 50mL)',\n",
+       "       'Methotrexate Sodium P.F.', 'Lidocaine 2%',\n",
+       "       'Hepatitis B Immu Glob (HepaGam B)', 'DOXOrubicin'], dtype=object)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "additives = prescriptions[prescriptions['drug_type']=='ADDITIVE']['drug'].tolist()\n",
+    "prescriptions[prescriptions['drug_type']=='ADDITIVE']['drug'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "00cf308f",
+   "metadata": {
+    "id": "00cf308f"
+   },
+   "outputs": [],
+   "source": [
+    "additives_to_include = ['Calcium Gluconate', 'Potassium Chloride', 'Heparin (Preservative Free)',\n",
+    "       'Multivitamin 12', 'Folic Acid',\n",
+    "       'Multivitamin-12', 'Thiamine HCl',\n",
+    "       'Hydrocortisone Na Succ.', 'Heparin Sodium', 'Bupivacaine 0.5%',\n",
+    "       'Epoetin Alfa', 'Sodium Bicarbonate',\n",
+    "       'Multivitamin IV',\n",
+    "       'Hydrocortisone Na Succinate', 'Heparin (Preserv. Free)',\n",
+    "       'Bupivacaine 0.50%', 'Lidocaine 1% PF', 'Potassium Phosphate',\n",
+    "       'Octreotide Acetate', 'Heparin', 'Mannitol', 'Lidocaine 1% P.F.',\n",
+    "       'Magnesium Sulfate', 'Thiamine', 'Mesna', 'Fentanyl Citrate', 'Calcium Chloride',\n",
+    "       'Cytarabine', 'VinCRIStine', 'Humulin-R Insulin',\n",
+    "       'Multivitamin IV (Without Vit. K)', 'Bupivacaine 0.05%',\n",
+    "       'Bupivacaine 0.1%', 'Insulin Human Regular', 'Phytonadione',\n",
+    "       'Famotidine', 'Vincristine Sulfate', 'Lidocaine 1%', 'CloniDINE', 'Sodium Phosphate',\n",
+    "       'Clonidine HCl', 'Hydromorphone P.F.', 'KCl',\n",
+    "       'Hydrocortisone Sod Succinate', 'FoLIC Acid',\n",
+    "       'VinCRIStine (Oncovin)', 'Hydrocortisone Na Succ', 'Ondansetron',\n",
+    "       'Fosaprepitant', 'Hydrocortisone', 'Bupivacaine 0.2%',\n",
+    "       'HYDROmorphone P.F.', 'Methotrexate Sodium P.F.', 'Lidocaine 2%',\n",
+    "       'Hepatitis B Immu Glob (HepaGam B)', 'DOXOrubicin']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "86d4fa32",
+   "metadata": {
+    "id": "86d4fa32"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Sodium Chloride',\n",
+       " 'Sodium Acetate',\n",
+       " 'Normocarb Dialysate',\n",
+       " 'Mannitol 20%',\n",
+       " 'Albumin',\n",
+       " 'Sodium Chloride 23.4%',\n",
+       " 'Dextrose 50%',\n",
+       " 'Sodium Chloride 0.9%',\n",
+       " 'Sodium Chloride ',\n",
+       " 'Albumin 25% (12.5gm)',\n",
+       " 'Potassium Acetate',\n",
+       " 'Albumin 25% (12.5g / 50mL)']"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "additives_to_exclude = list(set(additives)-set(additives_to_include))\n",
+    "additives_to_exclude"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "3adf6d88",
+   "metadata": {
+    "id": "3adf6d88"
+   },
+   "outputs": [],
+   "source": [
+    "prescriptions_to_include = prescriptions_to_include[~prescriptions_to_include['drug'].isin(additives_to_exclude)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "42e10bcb",
+   "metadata": {
+    "id": "42e10bcb"
+   },
+   "source": [
+    "### Drugs taken through certain routes were excluded based on expert advice."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "72e86a94",
+   "metadata": {
+    "id": "72e86a94"
+   },
+   "outputs": [],
+   "source": [
+    "routes_to_include = ['PO', 'IV', 'IV DRIP', 'SC', 'SL', 'IM', 'NG', 'PR', 'OU', 'IVPCA',\n",
+    "       'IH', 'TP', 'DIALYS', 'NU', 'ORAL', 'IV BOLUS', 'PO/NG', 'TD',\n",
+    "       'BOTH EYES', 'PB', 'ED', 'REPLACE', 'G TUBE', 'ET', 'DWELL', 'IR',\n",
+    "       'VG', 'LEFT EYE', 'NEB', 'IN', 'ID', 'AU', 'OS', 'IRR', 'OD',\n",
+    "       'INHALATION', 'IA', 'AS', 'AD', 'J TUBE', 'LOCK', 'IT', 'IJ',\n",
+    "       'IP', 'PL', 'LEFT EAR', 'NAS', 'TT', 'RIGHT EYE', 'EX-VIVO', 'NS',\n",
+    "       'BU', 'BOTH EARS', 'BUCCAL', 'SUBCUT', 'SCPUMP', 'ND',\n",
+    "       'PERIPHNERVE', 'AERO', 'IO', 'VT', 'RIGHT EAR', 'PO/IV', 'OG',\n",
+    "       'PO/PR', 'RECTAL', 'SCPCA', 'IVT', 'IC', 'IVS', 'NG/OG', \n",
+    "       'ENTERAL TUBE ONLY ? NOT ORAL', 'PO OR ENTERAL TUBE', 'PO/OG']\n",
+    "few_routes_only = ['PO/IV', 'PO', 'IV', 'SC', 'SUBCUT', 'ORAL', 'IV DRIP', 'IM', 'NG', 'PO/NG', 'IVPCA', \n",
+    "                   'IV BOLUS', 'IVT', 'IVS']\n",
+    "# Use few_routes_only if analysis becomes too cumbersome if all routes in routes_to_include are used"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "65f6a1a9",
+   "metadata": {
+    "id": "65f6a1a9"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['PO', 'IV', 'IV DRIP', 'SC', 'SL', 'IM', 'NG', 'PR', 'OU', 'IVPCA',\n",
+       "       'IH', 'TP', 'DIALYS', 'NU', 'ORAL', 'IV BOLUS', 'PO/NG', 'TD',\n",
+       "       'BOTH EYES', 'PB', 'ED', 'REPLACE', 'G TUBE', 'ET', 'DWELL', 'IR',\n",
+       "       'VG', 'LEFT EYE', 'NEB', 'IN', 'ID', 'AU', 'OS', 'IRR', 'OD',\n",
+       "       'INHALATION', 'IA', 'AS', 'AD', 'J TUBE', 'LOCK', 'IT', None, 'IJ',\n",
+       "       'IP', 'PL', 'LEFT EAR', 'NAS', 'TT', 'RIGHT EYE', 'EX-VIVO', 'NS',\n",
+       "       'BU', 'BOTH EARS', 'BUCCAL', 'SUBCUT', 'SCPUMP', 'ND',\n",
+       "       'PERIPHNERVE', 'AERO', 'IO', 'VT', 'RIGHT EAR', 'PO/IV', 'OG',\n",
+       "       'PO/PR', 'RECTAL', 'SCPCA', 'IVT', 'IC', 'IVS', 'NG/OG',\n",
+       "       'ENTERAL TUBE ONLY ? NOT ORAL', 'PO OR ENTERAL TUBE', 'PO/OG',\n",
+       "       'INTERSPACE', 'INTRAPERICARDIAL', 'LUMBAR PLEXUS', 'AXILLARY'],\n",
+       "      dtype=object)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "routes = prescriptions['route'].unique().tolist()\n",
+    "prescriptions['route'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "371e59b3",
+   "metadata": {
+    "id": "371e59b3"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['PO OR ENTERAL TUBE', 'PB', 'RECTAL', 'ET', 'PL', 'ND', 'LEFT EAR', 'BOTH EYES', 'NU', 'OD', 'TP', 'NAS', 'OU', 'NEB', 'EX-VIVO', 'LEFT EYE', 'NG/OG', 'AU', 'IRR', 'RIGHT EAR', 'OG', 'RIGHT EYE', 'BOTH EARS', 'TT', 'BU', 'LOCK', 'PR', None, 'NS', 'G TUBE', 'ED', 'IT', 'IR', 'AERO', 'ID', 'SCPCA', 'IJ', 'INHALATION', 'BUCCAL', 'SCPUMP', 'DIALYS', 'OS', 'PO/PR', 'INTRAPERICARDIAL', 'PO/OG', 'IP', 'PERIPHNERVE', 'IA', 'VT', 'VG', 'AXILLARY', 'INTERSPACE', 'IC', 'DWELL', 'IH', 'TD', 'J TUBE', 'ENTERAL TUBE ONLY ? NOT ORAL', 'SL', 'AS', 'IO', 'AD', 'LUMBAR PLEXUS', 'REPLACE', 'IN']\n"
+     ]
+    }
+   ],
+   "source": [
+    "#routes_to_exclude = list(set(routes)-set(routes_to_include))\n",
+    "routes_to_exclude = list(set(routes)-set(few_routes_only))\n",
+    "print(routes_to_exclude)\n",
+    "if None in routes_to_exclude: \n",
+    "    routes_to_exclude.remove(None)\n",
+    "if np.nan in routes_to_exclude: \n",
+    "    routes_to_exclude.remove(np.nan)\n",
+    "prescriptions_to_include = prescriptions_to_include[~prescriptions_to_include['route'].isin(routes_to_exclude)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "803f227d",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "803f227d",
+    "outputId": "24b61abf-fb28-4327-b95d-14d9ec67e63d"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3497"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions_to_include['drug'].nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3QA_V_appNES",
+   "metadata": {
+    "id": "3QA_V_appNES"
+   },
+   "source": [
+    "The above steps have reduced the number of different drugs from 4525 to 3497."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3b4c41b7",
+   "metadata": {
+    "id": "3b4c41b7"
+   },
+   "source": [
+    "<font size=\"4\">The entries in the 'drug' column have many values that are duplicates due to either spelling differences, different dosages, different cases, different brand names etc. In calculating polypharmacy, we are only concerned about the drug name, we do not consider different doses or brand names as different drugs. Due to this, we try to reduce the duplicate entries as much as possible before calculating polypharmacy. The first step is to make the case uniform and remove numbers and special characters from the entries in the 'drug' column. In the next step, we use the 'formulary_drug_cd' column and map the entries to the entries in the 'drug' column. This helps in reducing the 'drug' column as there are fewer unique entries in the 'formulary_drug_cd' column than the 'drug' column. We have ignored null values present in the 'formulary_drug_cd' column. Finally, we standardise the entries in the 'drug' column using a package called drugstandards https://doi.org/10.5281/zenodo.571248. Some entries in the 'drug' column may be standardised to null values if no suitable match is found. We ignore these emtries in the calculation of polypharmacy.</font>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "Mh330SJ0hXRf",
+   "metadata": {
+    "id": "Mh330SJ0hXRf"
+   },
+   "source": [
+    "### Simplifying the 'drug' column by making drug names to lower case, keeping only alphabet characters in the drug names and removing any words of length less than 3."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "47742c3a",
+   "metadata": {
+    "id": "47742c3a"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\manue\\AppData\\Local\\Temp/ipykernel_15608/259907385.py:2: FutureWarning: The default value of regex will change from True to False in a future version.\n",
+      "  prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.replace(r'[^a-zA-Z]+', r' ')\n",
+      "C:\\Users\\manue\\AppData\\Local\\Temp/ipykernel_15608/259907385.py:3: FutureWarning: The default value of regex will change from True to False in a future version.\n",
+      "  prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.replace(r'\\b\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+      "C:\\Users\\manue\\AppData\\Local\\Temp/ipykernel_15608/259907385.py:4: FutureWarning: The default value of regex will change from True to False in a future version.\n",
+      "  prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.replace(r'\\b\\w\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+      "C:\\Users\\manue\\AppData\\Local\\Temp/ipykernel_15608/259907385.py:6: FutureWarning: The default value of regex will change from True to False in a future version.\n",
+      "  prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.replace(r' +', r' ')\n"
+     ]
+    }
+   ],
+   "source": [
+    "prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.lower()\n",
+    "prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.replace(r'[^a-zA-Z]+', r' ')\n",
+    "prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.replace(r'\\b\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+    "prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.replace(r'\\b\\w\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+    "#removing multiple white spaces\n",
+    "prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.replace(r' +', r' ')\n",
+    "#removing white spaces at the ends\n",
+    "prescriptions_to_include['drug'] = prescriptions_to_include['drug'].str.strip()\n",
+    "prescriptions_to_include = prescriptions_to_include[prescriptions_to_include['drug']!='']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "f62e159b",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "f62e159b",
+    "outputId": "c355c7cf-4485-4c85-b944-3bc0c9c854f3"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2603"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions_to_include['drug'].nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "JAVU__7go-lX",
+   "metadata": {
+    "id": "JAVU__7go-lX"
+   },
+   "source": [
+    "This has reduced the number of different drugs from 3497 to 2603."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "144e3afa",
+   "metadata": {
+    "id": "144e3afa"
+   },
+   "source": [
+    "### Reducing the 'formulary_drug_cd' column by making the case uniform, keeping only alphabet characters and removing single character words. This is done so that a mapping from 'formulary_drug_cd' to 'drug' can be done in order to further simplify the 'drug' column."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "afcb90a4",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "afcb90a4",
+    "outputId": "b5c5d777-58d6-49a5-81ff-50cc2d913242"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2595"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions_to_include['formulary_drug_cd'].nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "fe41f153",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "fe41f153",
+    "outputId": "351a22b2-48ec-4547-d47b-0b0cceef68ff"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\manue\\AppData\\Local\\Temp/ipykernel_15608/196274087.py:1: FutureWarning: The default value of regex will change from True to False in a future version.\n",
+      "  prescriptions_to_include['formulary_drug_cd'] = prescriptions_to_include['formulary_drug_cd'].str.replace(r'[^a-zA-Z]+', r' ')\n",
+      "C:\\Users\\manue\\AppData\\Local\\Temp/ipykernel_15608/196274087.py:3: FutureWarning: The default value of regex will change from True to False in a future version.\n",
+      "  prescriptions_to_include['formulary_drug_cd'] = prescriptions_to_include['formulary_drug_cd'].str.replace(r'\\b\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+      "C:\\Users\\manue\\AppData\\Local\\Temp/ipykernel_15608/196274087.py:5: FutureWarning: The default value of regex will change from True to False in a future version.\n",
+      "  prescriptions_to_include['formulary_drug_cd'] = prescriptions_to_include['formulary_drug_cd'].str.replace(r' +', r' ')\n"
+     ]
+    }
+   ],
+   "source": [
+    "prescriptions_to_include['formulary_drug_cd'] = prescriptions_to_include['formulary_drug_cd'].str.replace(r'[^a-zA-Z]+', r' ')\n",
+    "prescriptions_to_include['formulary_drug_cd'] = prescriptions_to_include['formulary_drug_cd'].str.upper()\n",
+    "prescriptions_to_include['formulary_drug_cd'] = prescriptions_to_include['formulary_drug_cd'].str.replace(r'\\b\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+    "#removing multiple white spaces\n",
+    "prescriptions_to_include['formulary_drug_cd'] = prescriptions_to_include['formulary_drug_cd'].str.replace(r' +', r' ')\n",
+    "#removing white spaces at the ends\n",
+    "prescriptions_to_include['formulary_drug_cd'] = prescriptions_to_include['formulary_drug_cd'].str.strip()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "c07b3428",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "c07b3428",
+    "outputId": "2ee52b50-b2b3-4a69-e67b-6eb3d9368fb0"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1358"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions_to_include['formulary_drug_cd'].nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "QrU6DslNrNh6",
+   "metadata": {
+    "id": "QrU6DslNrNh6"
+   },
+   "source": [
+    "This has reduced the number of unique entries in the 'formulary_drug_cd' column from 2595 to 1358."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "604c9a73",
+   "metadata": {
+    "id": "604c9a73"
+   },
+   "source": [
+    "###  The number of unique values in the 'formulary_drug_cd' column is lower than the number in the 'drug' column. Thus, mapping from 'formulary_drug_cd' to 'drug' as shown below helps in futrther reducing the 'drug' column."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "e118199d",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "e118199d",
+    "outputId": "352d1e3e-5c8b-4b77-b5cb-c634d50cef20"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2603"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions_to_include['drug'].nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "530fcd38",
+   "metadata": {
+    "id": "530fcd38"
+   },
+   "outputs": [],
+   "source": [
+    "form_drug_codes = prescriptions_to_include['formulary_drug_cd'].unique().tolist()\n",
+    "form_drug_codes = [x for x in form_drug_codes if x != None and x!='' and x!=np.nan]\n",
+    "for drug_code in form_drug_codes:\n",
+    "    if len(prescriptions_to_include[prescriptions_to_include['formulary_drug_cd']==drug_code]['drug'].value_counts().tolist()) > 0:\n",
+    "        drug_name = prescriptions_to_include[prescriptions_to_include['formulary_drug_cd']==drug_code]['drug'].value_counts().index[0]\n",
+    "        prescriptions_to_include.loc[prescriptions_to_include['formulary_drug_cd']==drug_code, 'drug'] = drug_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "32fc31e9",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "32fc31e9",
+    "outputId": "670b8e3d-af95-4f43-d1c8-cd184a1f88d7"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1551"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions_to_include['drug'].nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "SIPSBVjYr4rw",
+   "metadata": {
+    "id": "SIPSBVjYr4rw"
+   },
+   "source": [
+    "The mapping has reduced number of different drugs from 2603 to 1551."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "tE_kQRLFynDB",
+   "metadata": {
+    "id": "tE_kQRLFynDB"
+   },
+   "source": [
+    "### The drugstandards package is used to standardise the drug names. The package considers synonyms, brand names and also matches mispelled entries based on the Jaro-Winkler similarity. Using this package, a dictionary is created that maps unique entries in the 'drug' column to standardised drug names. This dictionary is then used on the 'drug' column."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "cYq5VLbK03z3",
+   "metadata": {
+    "id": "cYq5VLbK03z3"
+   },
+   "outputs": [],
+   "source": [
+    "drugs_list = prescriptions_to_include['drug'].unique().tolist()\n",
+    "std_drugs_list = drugs.standardize(drugs_list)\n",
+    "drugs_dict = dict(zip(drugs_list, std_drugs_list))\n",
+    "prescriptions_to_include.rename(columns={\"drug\": \"std_drug\"}, inplace = True)\n",
+    "prescriptions_to_include['drug'] = prescriptions_to_include['std_drug']\n",
+    "prescriptions_to_include.replace({'std_drug': drugs_dict}, inplace = True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "98f60cea",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>hadm_id</th>\n",
+       "      <th>icustay_id</th>\n",
+       "      <th>drug_type</th>\n",
+       "      <th>std_drug</th>\n",
+       "      <th>formulary_drug_cd</th>\n",
+       "      <th>route</th>\n",
+       "      <th>startdate</th>\n",
+       "      <th>enddate</th>\n",
+       "      <th>drug</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>TACROLIMUS</td>\n",
+       "      <td>TACR</td>\n",
+       "      <td>PO</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-12</td>\n",
+       "      <td>tacrolimus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>WARFARIN</td>\n",
+       "      <td>WARF</td>\n",
+       "      <td>PO</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-12</td>\n",
+       "      <td>warfarin</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>HEPARIN</td>\n",
+       "      <td>HEPAPREMIX</td>\n",
+       "      <td>IV</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-12</td>\n",
+       "      <td>heparin sodium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>FUROSEMIDE</td>\n",
+       "      <td>FURO</td>\n",
+       "      <td>PO</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-12</td>\n",
+       "      <td>furosemide</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>WARFARIN</td>\n",
+       "      <td>WARF</td>\n",
+       "      <td>PO</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-15</td>\n",
+       "      <td>warfarin</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   subject_id  hadm_id  icustay_id drug_type    std_drug formulary_drug_cd  \\\n",
+       "0           6   107064         NaN      MAIN  TACROLIMUS              TACR   \n",
+       "1           6   107064         NaN      MAIN    WARFARIN              WARF   \n",
+       "2           6   107064         NaN      MAIN     HEPARIN        HEPAPREMIX   \n",
+       "4           6   107064         NaN      MAIN  FUROSEMIDE              FURO   \n",
+       "5           6   107064         NaN      MAIN    WARFARIN              WARF   \n",
+       "\n",
+       "  route  startdate    enddate            drug  \n",
+       "0    PO 2175-06-11 2175-06-12      tacrolimus  \n",
+       "1    PO 2175-06-11 2175-06-12        warfarin  \n",
+       "2    IV 2175-06-11 2175-06-12  heparin sodium  \n",
+       "4    PO 2175-06-11 2175-06-12      furosemide  \n",
+       "5    PO 2175-06-11 2175-06-15        warfarin  "
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions_to_include.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bCbcD2CulaRY",
+   "metadata": {
+    "id": "bCbcD2CulaRY"
+   },
+   "source": [
+    "<font size=\"4\">Polypharmacy is calculated for each 'hadm_id' by checking whether at least five different drugs are prescribed for the same day. To achieve this, firstly a list of dates is generated for each drug in the prescriptions table using the 'startdate' and 'enddate'. Following this, entries with null values are removed. Finally, for a given 'hadm_id' we check if there are at least five overlapping days among the lists of dates for different drugs that are prescribed. This would indicate if there are at least five different drugs prescribed for the same day.</font>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9bcb360c",
+   "metadata": {
+    "id": "9bcb360c"
+   },
+   "source": [
+    "### Adding the list of dates from startdate to enddate for each prescription and removing prescriptions with null values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "21da572c",
+   "metadata": {
+    "id": "21da572c"
+   },
+   "outputs": [],
+   "source": [
+    "dates = []\n",
+    "for i, row in prescriptions_to_include.iterrows():\n",
+    "    dates_i = []\n",
+    "    if row['startdate']:\n",
+    "        start = pd.to_datetime(row['startdate'])\n",
+    "        dates_i.append(start)\n",
+    "        if row['enddate']:\n",
+    "            end = pd.to_datetime(row['enddate'])\n",
+    "            if end-start>datetime.timedelta(days=0) and end-start<datetime.timedelta(days=200):\n",
+    "                while start!=end:\n",
+    "                    start = start + datetime.timedelta(days=1)\n",
+    "                    dates_i.append(start)\n",
+    "        dates.append(dates_i)\n",
+    "    else:\n",
+    "        dates.append(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "3ad3f32c",
+   "metadata": {
+    "id": "3ad3f32c"
+   },
+   "outputs": [],
+   "source": [
+    "prescriptions_to_include['dates'] = dates\n",
+    "prescriptions_to_include = prescriptions_to_include[prescriptions_to_include['std_drug'].notna()]\n",
+    "prescriptions_to_include = prescriptions_to_include[prescriptions_to_include['startdate'].notna()]\n",
+    "prescriptions_to_include = prescriptions_to_include[prescriptions_to_include['enddate'].notna()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "2728099e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>hadm_id</th>\n",
+       "      <th>icustay_id</th>\n",
+       "      <th>drug_type</th>\n",
+       "      <th>std_drug</th>\n",
+       "      <th>formulary_drug_cd</th>\n",
+       "      <th>route</th>\n",
+       "      <th>startdate</th>\n",
+       "      <th>enddate</th>\n",
+       "      <th>drug</th>\n",
+       "      <th>dates</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>TACROLIMUS</td>\n",
+       "      <td>TACR</td>\n",
+       "      <td>PO</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-12</td>\n",
+       "      <td>tacrolimus</td>\n",
+       "      <td>[2175-06-11 00:00:00, 2175-06-12 00:00:00]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>WARFARIN</td>\n",
+       "      <td>WARF</td>\n",
+       "      <td>PO</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-12</td>\n",
+       "      <td>warfarin</td>\n",
+       "      <td>[2175-06-11 00:00:00, 2175-06-12 00:00:00]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>HEPARIN</td>\n",
+       "      <td>HEPAPREMIX</td>\n",
+       "      <td>IV</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-12</td>\n",
+       "      <td>heparin sodium</td>\n",
+       "      <td>[2175-06-11 00:00:00, 2175-06-12 00:00:00]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>FUROSEMIDE</td>\n",
+       "      <td>FURO</td>\n",
+       "      <td>PO</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-12</td>\n",
+       "      <td>furosemide</td>\n",
+       "      <td>[2175-06-11 00:00:00, 2175-06-12 00:00:00]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>6</td>\n",
+       "      <td>107064</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>MAIN</td>\n",
+       "      <td>WARFARIN</td>\n",
+       "      <td>WARF</td>\n",
+       "      <td>PO</td>\n",
+       "      <td>2175-06-11</td>\n",
+       "      <td>2175-06-15</td>\n",
+       "      <td>warfarin</td>\n",
+       "      <td>[2175-06-11 00:00:00, 2175-06-12 00:00:00, 217...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   subject_id  hadm_id  icustay_id drug_type    std_drug formulary_drug_cd  \\\n",
+       "0           6   107064         NaN      MAIN  TACROLIMUS              TACR   \n",
+       "1           6   107064         NaN      MAIN    WARFARIN              WARF   \n",
+       "2           6   107064         NaN      MAIN     HEPARIN        HEPAPREMIX   \n",
+       "4           6   107064         NaN      MAIN  FUROSEMIDE              FURO   \n",
+       "5           6   107064         NaN      MAIN    WARFARIN              WARF   \n",
+       "\n",
+       "  route  startdate    enddate            drug  \\\n",
+       "0    PO 2175-06-11 2175-06-12      tacrolimus   \n",
+       "1    PO 2175-06-11 2175-06-12        warfarin   \n",
+       "2    IV 2175-06-11 2175-06-12  heparin sodium   \n",
+       "4    PO 2175-06-11 2175-06-12      furosemide   \n",
+       "5    PO 2175-06-11 2175-06-15        warfarin   \n",
+       "\n",
+       "                                               dates  \n",
+       "0         [2175-06-11 00:00:00, 2175-06-12 00:00:00]  \n",
+       "1         [2175-06-11 00:00:00, 2175-06-12 00:00:00]  \n",
+       "2         [2175-06-11 00:00:00, 2175-06-12 00:00:00]  \n",
+       "4         [2175-06-11 00:00:00, 2175-06-12 00:00:00]  \n",
+       "5  [2175-06-11 00:00:00, 2175-06-12 00:00:00, 217...  "
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescriptions_to_include.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b1FEXtLVouxv",
+   "metadata": {
+    "id": "b1FEXtLVouxv"
+   },
+   "source": [
+    "### Checking for overlapping dates to create the polypharmacy variable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d150e3be",
+   "metadata": {
+    "id": "d150e3be"
+   },
+   "outputs": [],
+   "source": [
+    "limit = 5\n",
+    "polypharm = []\n",
+    "for case_id in prescriptions_to_include['hadm_id'].unique():\n",
+    "    poly = [case_id]\n",
+    "    case = prescriptions_to_include[prescriptions_to_include['hadm_id'] == case_id][['hadm_id', 'std_drug', 'dates']].reset_index(drop=True)\n",
+    "    \n",
+    "    # combine all the same drugs into one entry simply including all the dates, then use combinations to check polypharmacy\n",
+    "    drugs = [index for index, value in case['std_drug'].value_counts().items() if value>1]\n",
+    "    if len(drugs)>0:\n",
+    "        for drug in drugs:\n",
+    "            drug_dates = [value for index, value in case[case['std_drug']==drug]['dates'].items()]\n",
+    "            flat_dates = list(set([item for sublist in drug_dates for item in sublist]))\n",
+    "            case.loc[-1] = [case_id, drug, flat_dates]  # adding a row\n",
+    "            case.index = case.index + 1  # shifting index\n",
+    "            case.sort_index(inplace=True) \n",
+    "        case = case.drop_duplicates(subset=['std_drug'], keep='first')\n",
+    "    for combo in list(combinations(case['dates'].values.tolist(), limit)):\n",
+    "        common = combo[0]\n",
+    "        for i in range(1,limit):\n",
+    "            common = list(set(common) & set(combo[i]))\n",
+    "        if len(common) > 0:\n",
+    "            poly.append(1)\n",
+    "            poly.append(max(common)-min(common) + datetime.timedelta(days=1))\n",
+    "            break;\n",
+    "    if len(poly)==1:\n",
+    "        poly.append(0)\n",
+    "        poly.append(0)\n",
+    "    polypharm.append(poly)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "z9RM99m0RsXR",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "id": "z9RM99m0RsXR",
+    "outputId": "07d6dd42-08c5-4a4b-8624-f24a9ec9ceab"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "  <div id=\"df-37e539cc-6d23-48ac-b52d-ef0cfe6cb88d\">\n",
+       "    <div class=\"colab-df-container\">\n",
+       "      <div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>hadm_id</th>\n",
+       "      <th>polypharmacy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>107064</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>143045</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>150750</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>163353</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>185777</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>\n",
+       "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-37e539cc-6d23-48ac-b52d-ef0cfe6cb88d')\"\n",
+       "              title=\"Convert this dataframe to an interactive table.\"\n",
+       "              style=\"display:none;\">\n",
+       "        \n",
+       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+       "       width=\"24px\">\n",
+       "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+       "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+       "  </svg>\n",
+       "      </button>\n",
+       "      \n",
+       "  <style>\n",
+       "    .colab-df-container {\n",
+       "      display:flex;\n",
+       "      flex-wrap:wrap;\n",
+       "      gap: 12px;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-convert {\n",
+       "      background-color: #E8F0FE;\n",
+       "      border: none;\n",
+       "      border-radius: 50%;\n",
+       "      cursor: pointer;\n",
+       "      display: none;\n",
+       "      fill: #1967D2;\n",
+       "      height: 32px;\n",
+       "      padding: 0 0 0 0;\n",
+       "      width: 32px;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-convert:hover {\n",
+       "      background-color: #E2EBFA;\n",
+       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+       "      fill: #174EA6;\n",
+       "    }\n",
+       "\n",
+       "    [theme=dark] .colab-df-convert {\n",
+       "      background-color: #3B4455;\n",
+       "      fill: #D2E3FC;\n",
+       "    }\n",
+       "\n",
+       "    [theme=dark] .colab-df-convert:hover {\n",
+       "      background-color: #434B5C;\n",
+       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+       "      fill: #FFFFFF;\n",
+       "    }\n",
+       "  </style>\n",
+       "\n",
+       "      <script>\n",
+       "        const buttonEl =\n",
+       "          document.querySelector('#df-37e539cc-6d23-48ac-b52d-ef0cfe6cb88d button.colab-df-convert');\n",
+       "        buttonEl.style.display =\n",
+       "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+       "\n",
+       "        async function convertToInteractive(key) {\n",
+       "          const element = document.querySelector('#df-37e539cc-6d23-48ac-b52d-ef0cfe6cb88d');\n",
+       "          const dataTable =\n",
+       "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+       "                                                     [key], {});\n",
+       "          if (!dataTable) return;\n",
+       "\n",
+       "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+       "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+       "            + ' to learn more about interactive tables.';\n",
+       "          element.innerHTML = '';\n",
+       "          dataTable['output_type'] = 'display_data';\n",
+       "          await google.colab.output.renderOutput(dataTable, element);\n",
+       "          const docLink = document.createElement('div');\n",
+       "          docLink.innerHTML = docLinkHtml;\n",
+       "          element.appendChild(docLink);\n",
+       "        }\n",
+       "      </script>\n",
+       "    </div>\n",
+       "  </div>\n",
+       "  "
+      ],
+      "text/plain": [
+       "   hadm_id  polypharmacy\n",
+       "0   107064             1\n",
+       "1   143045             1\n",
+       "2   150750             1\n",
+       "3   163353             0\n",
+       "4   185777             1"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescription_polypharmacy = pd.DataFrame(polypharm, columns =['hadm_id', 'polypharmacy', 'duration'])\n",
+    "prescription_polypharmacy.drop(columns = ['duration']).head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "-8aOFtgPo4lG",
+   "metadata": {
+    "id": "-8aOFtgPo4lG"
+   },
+   "source": [
+    "As can be seen below, among 49130 'hadm_id's, 46179 were found to have at least 5 drugs prescribed for the same day. We could set a higher threshold for the minimum number of drugs required to be classified as polypharmacy. The polypharmacy variable can be used to explore adverse effects of polypharmacy in a hospital setting in the MIMIC-III dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fGIYQ1sWpeP1",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "fGIYQ1sWpeP1",
+    "outputId": "be2b26cb-6ade-4ea3-dfdf-63317fa3cee6"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "49130"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescription_polypharmacy['hadm_id'].nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "hkRA-5JWRdM4",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "hkRA-5JWRdM4",
+    "outputId": "ad727de9-4e07-491d-ba96-ca6c07297c38"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    46179\n",
+       "0     2951\n",
+       "Name: polypharmacy, dtype: int64"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prescription_polypharmacy['polypharmacy'].value_counts()"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [],
+   "name": "github_1606.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

	subject_id	hadm_id	icustay_id	drug_type	std_drug	formulary_drug_cd	route	startdate	enddate	drug
0	6	107064	NaN	MAIN	TACROLIMUS	TACR	PO	2175-06-11	2175-06-12	tacrolimus
1	6	107064	NaN	MAIN	WARFARIN	WARF	PO	2175-06-11	2175-06-12	warfarin
2	6	107064	NaN	MAIN	HEPARIN	HEPAPREMIX	IV	2175-06-11	2175-06-12	heparin sodium
4	6	107064	NaN	MAIN	FUROSEMIDE	FURO	PO	2175-06-11	2175-06-12	furosemide
5	6	107064	NaN	MAIN	WARFARIN	WARF	PO	2175-06-11	2175-06-15	warfarin