diff --git a/.gitignore b/.gitignore index b1c381e..88782f3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,9 @@ *.DS_Store *.idea/ *.vs/ -*__pycache__/ \ No newline at end of file +*__pycache__/ +*venv/ +*.html +*.log +*.png +*.pdf \ No newline at end of file diff --git a/README.md b/README.md index adeaf4f..24e4441 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,17 @@ A python package without useful utility tools was developed to aid extraction and visualization. The package is compatible with `Python >= 3.8` and has the following dependencies: -* pandas - * numpy -* matplotlib +* folium + +* pycountry + +* geopandas + +* Pillow + +Note that these dependencies will be installed directly when launching the Jypyter Notebooks. ## Installation @@ -31,8 +37,14 @@ Example application notebooks are available in the [apps/](https://github.com/an ## Troubleshoot +1) Virtual environment activation + To activate the virtual environment on Windows, instead of `source venv/bin/activate` run `./venv/Scripts/activate`. +2) `ImportError: No module named selenium` + +To export the generated map as a PNG image, the [Firefox](https://www.mozilla.org/en-US/firefox/new/) explorer is required. + ## License This repository has [MIT license](https://github.com/andreped/breast-cancer-stats/blob/main/LICENSE). diff --git a/apps/breast_cancer_mortality_rate.ipynb b/apps/breast_cancer_mortality_rate.ipynb new file mode 100644 index 0000000..c930ab7 --- /dev/null +++ b/apps/breast_cancer_mortality_rate.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "# Install dependencies\n", + "!pip install -q geopandas folium pycountry Pillow selenium" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import folium\n", + "import pandas as pd\n", + "import pycountry\n", + "import io\n", + "from PIL import Image\n", + "\n", + "cancer_data = pd.read_csv(\"../data/SA_0000001439.csv\")\n", + "political_countries_url = (\n", + " \"http://geojson.xyz/naturalearth-3.3.0/ne_50m_admin_0_countries.geojson\"\n", + ")\n", + "\n", + "# convert country code to full name\n", + "tmp = []\n", + "for x in cancer_data[\"COUNTRY\"]:\n", + " try:\n", + " tmp.append(pycountry.countries.get(alpha_3=x).name)\n", + " except AttributeError:\n", + " tmp.append(x)\n", + "cancer_data[\"COUNTRY\"] = tmp\n", + "\n", + "m = folium.Map(location=(30, 10), zoom_start=2, tiles=\"cartodb positron\")\n", + "folium.Choropleth(\n", + " geo_data=political_countries_url,\n", + " data=cancer_data,\n", + " columns=[\"COUNTRY\", \"Numeric\"],\n", + " key_on=\"feature.properties.name\",\n", + " fill_color=\"Reds\",\n", + " nan_fill_color=\"Grey\",\n", + " fill_opacity=0.7,\n", + " line_opacity=0.2,\n", + " legend_name=\"Breast cancer mortality rate.\"\n", + ").add_to(m)\n", + "\n", + "# to export snapshot of map as PNG\n", + "img_data = m._to_png(5)\n", + "img = Image.open(io.BytesIO(img_data))\n", + "img.save('mortality_rate_breast_cancer.png')\n", + "\n", + "# to export as interactive viewer in HTML\n", + "m.save(\"mortality_rate_breast_cancer.html\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/assets/requirements.txt b/assets/requirements.txt index 65fec04..3516284 100644 --- a/assets/requirements.txt +++ b/assets/requirements.txt @@ -1,4 +1,5 @@ -pandas -numpy -matplotlib -setuptools \ No newline at end of file +geopandas +folium +pycountry +Pillow +selenium