Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Carlos Ensembles #5

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 226 additions & 24 deletions your-code/main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"id": "00cf591d-8a5b-499e-8715-1ad140867934",
"metadata": {},
"outputs": [],
Expand All @@ -21,17 +21,166 @@
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"# Load the dataset (change the path if needed)\n",
"df = pd.read_csv('../data/heart.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"id": "0bb5ea1c-a4e5-4419-bae8-661fe2d82711",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>cp</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>fbs</th>\n",
" <th>restecg</th>\n",
" <th>thalach</th>\n",
" <th>exang</th>\n",
" <th>oldpeak</th>\n",
" <th>slope</th>\n",
" <th>ca</th>\n",
" <th>thal</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>145</td>\n",
" <td>233</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>150</td>\n",
" <td>0</td>\n",
" <td>2.3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>130</td>\n",
" <td>250</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>187</td>\n",
" <td>0</td>\n",
" <td>3.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>41</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>130</td>\n",
" <td>204</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>172</td>\n",
" <td>0</td>\n",
" <td>1.4</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>56</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>120</td>\n",
" <td>236</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>178</td>\n",
" <td>0</td>\n",
" <td>0.8</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>57</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>120</td>\n",
" <td>354</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>163</td>\n",
" <td>1</td>\n",
" <td>0.6</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
"0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
"1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
"2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
"3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
"4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
"\n",
" ca thal target \n",
"0 0 1 1 \n",
"1 0 2 1 \n",
"2 0 2 1 \n",
"3 0 2 1 \n",
"4 0 2 1 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
Expand All @@ -46,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"id": "23ad7e40-87f3-4b93-bef9-a9ddb5881ddc",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -75,23 +224,38 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 19,
"id": "d39376f1-b4ca-44c0-8364-d11b9a7605f9",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.8026315789473685\n",
"Train accuracy: 1.0\n"
]
}
],
"source": [
"#Create and Train a Decision Tree Classifier and print the train and test accuracy\n",
"\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.metrics import accuracy_score, mean_squared_error\n",
"\n",
"# Train Decision Tree\n",
"\n",
"dt_model = DecisionTreeClassifier()\n",
"dt_model.fit(X_train_scaled, y_train)\n",
"\n",
"# Predictions and evaluation\n",
"y_pred_test = dt_model.predict(X_test_scaled)\n",
"y_pred_train = dt_model.predict(X_train_scaled)\n",
"\n",
"\n",
"# Evaluate performance\n"
"# Evaluate performance\n",
"accuracy_test = accuracy_score(y_test, y_pred_test)\n",
"accuracy_train = accuracy_score(y_train, y_pred_train)\n",
"print(f\"Test accuracy: {accuracy_test}\")\n",
"print(f\"Train accuracy: {accuracy_train}\")"
]
},
{
Expand All @@ -104,7 +268,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"id": "9c60160a-b179-4896-a026-4beab803bb4e",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -135,10 +299,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 32,
"id": "8fc76766-a90c-47ed-bd02-66827a1dc115",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.8026315789473685\n",
"Train accuracy: 0.986784140969163\n"
]
}
],
"source": [
"# Create and Train a BaggingClassifier. \n",
"# Use as base estimator a weak decision tree (max_depth=1) and 100 estimators to really over a lot of different data samples\n",
Expand All @@ -147,12 +320,18 @@
"from sklearn.ensemble import BaggingClassifier\n",
"\n",
"# Train BaggingClassifier\n",
"\n",
"bag_model = BaggingClassifier()\n",
"bag_model.fit(X_train_scaled, y_train)\n",
"\n",
"# Predictions and evaluation\n",
"y_pred_test = bag_model.predict(X_test_scaled)\n",
"y_pred_train = bag_model.predict(X_train_scaled)\n",
"\n",
"\n",
"# Evaluate performance\n"
"# Evaluate performance\n",
"accuracy_test = accuracy_score(y_test, y_pred_test)\n",
"accuracy_train = accuracy_score(y_train, y_pred_train)\n",
"print(f\"Test accuracy: {accuracy_test}\")\n",
"print(f\"Train accuracy: {accuracy_train}\")"
]
},
{
Expand All @@ -165,7 +344,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 22,
"id": "9f892484-618a-46fe-8e56-0a18fa652ed8",
"metadata": {},
"outputs": [],
Expand All @@ -192,10 +371,27 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 38,
"id": "4bba1773-b0b0-44ba-a838-58b8c466ff88",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.8421052631578947\n",
"Train accuracy: 0.9383259911894273\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\crvid\\anaconda3\\Lib\\site-packages\\sklearn\\ensemble\\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.\n",
" warnings.warn(\n"
]
}
],
"source": [
"# Create and Train a AdaBoostClassifier. \n",
"# Use as base estimator a weak decision tree (max_depth=1) and 100 estimators to really target the specific behaviors of this phenomenon\n",
Expand All @@ -204,12 +400,18 @@
"from sklearn.ensemble import AdaBoostClassifier\n",
"\n",
"# Train AdaBoost\n",
"\n",
"ada_model = AdaBoostClassifier()\n",
"ada_model.fit(X_train_scaled, y_train)\n",
"\n",
"# Predictions and evaluation\n",
"y_pred_test = ada_model.predict(X_test_scaled)\n",
"y_pred_train = ada_model.predict(X_train_scaled)\n",
"\n",
"\n",
"# Evaluate performance\n"
"# Evaluate performance\n",
"accuracy_test = accuracy_score(y_test, y_pred_test)\n",
"accuracy_train = accuracy_score(y_train, y_pred_train)\n",
"print(f\"Test accuracy: {accuracy_test}\")\n",
"print(f\"Train accuracy: {accuracy_train}\")"
]
},
{
Expand All @@ -222,7 +424,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 24,
"id": "4b5e21fe-0a8f-45f6-a2d3-74261941f9c1",
"metadata": {},
"outputs": [],
Expand All @@ -235,7 +437,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand All @@ -249,7 +451,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.4"
}
},
"nbformat": 4,
Expand Down