From fdc01de3b0b2673b3f0b01ab7365100c1c75a229 Mon Sep 17 00:00:00 2001 From: grgmiller Date: Sat, 6 May 2023 12:23:25 -0700 Subject: [PATCH 1/3] export generator data including wind --- .../explore_data/export_generator_data.ipynb | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 notebooks/explore_data/export_generator_data.ipynb diff --git a/notebooks/explore_data/export_generator_data.ipynb b/notebooks/explore_data/export_generator_data.ipynb new file mode 100644 index 00000000..d8ab7b20 --- /dev/null +++ b/notebooks/explore_data/export_generator_data.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import packages\n", + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import plotly.express as px\n", + "\n", + "%reload_ext autoreload\n", + "%autoreload 2\n", + "\n", + "# # Tell python where to look for modules.\n", + "import sys\n", + "sys.path.append('../../../open-grid-emissions/src/')\n", + "\n", + "import download_data\n", + "import load_data\n", + "from column_checks import get_dtypes\n", + "from filepaths import *\n", + "import impute_hourly_profiles\n", + "import data_cleaning\n", + "import output_data\n", + "import emissions\n", + "import validation\n", + "import gross_to_net_generation\n", + "import eia930\n", + "\n", + "year = 2021\n", + "path_prefix = f\"{year}/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load data for each generator\n", + "pudl_out = load_data.initialize_pudl_out(year)\n", + "gens = pudl_out.gens_eia860()\n", + "\n", + "# load data about the balancing authority of each plant\n", + "plant_ba = pudl_out.plants_eia860()[\n", + " [\"plant_id_eia\", \"balancing_authority_code_eia\"]\n", + "].rename(columns={\"balancing_authority_code_eia\": \"ba_code\"})\n", + "\n", + "# merge the ba code into the gens data\n", + "gens = gens.merge(plant_ba, how=\"left\", on=\"plant_id_eia\", validate=\"m:1\")\n", + "\n", + "# load wind data\n", + "wind = pd.read_excel(\n", + " downloads_folder(f\"eia860/eia860{year}/3_2_Wind_Y{year}.xlsx\"),\n", + " sheet_name=\"Operable\",\n", + " header=1,\n", + ").rename(columns=\n", + " {\n", + " \"Plant Code\": \"plant_id_eia\",\n", + " \"Generator ID\": \"generator_id\",\n", + " \"Design Wind Speed (mph)\": \"rated_speed_mph\",\n", + " \"Turbine Hub Height (Feet)\": \"hub_height_ft\",\n", + " }\n", + ")\n", + "\n", + "# convert to metric units\n", + "wind[\"rated_speed_m_per_s\"] = (wind[\"rated_speed_mph\"] * 0.44704).round(1)\n", + "wind[\"hub_height_m\"] = (wind[\"hub_height_ft\"] * 0.3048).round(0)\n", + "\n", + "wind = wind[[\"plant_id_eia\", \"generator_id\", \"rated_speed_m_per_s\", \"hub_height_m\"]]\n", + "\n", + "# merge the wind data into the gens data\n", + "gens = gens.merge(wind, how=\"left\", on=[\"plant_id_eia\",\"generator_id\"])\n", + "\n", + "gens.to_csv(outputs_folder(f\"gens_{year}.csv\"), index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "open_grid_emissions", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "b893a8d8fbb165be288531947168b3b06bdb1508177327a21c265e0400df3100" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 869688142a88c06946e0ecc28a4d460a527b30dd Mon Sep 17 00:00:00 2001 From: grgmiller Date: Fri, 8 Dec 2023 09:58:07 -0800 Subject: [PATCH 2/3] update to 2022 --- .../explore_data/export_generator_data.ipynb | 75 +++++++++++++++---- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/notebooks/explore_data/export_generator_data.ipynb b/notebooks/explore_data/export_generator_data.ipynb index d8ab7b20..f56bbfa5 100644 --- a/notebooks/explore_data/export_generator_data.ipynb +++ b/notebooks/explore_data/export_generator_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -31,28 +31,68 @@ "import gross_to_net_generation\n", "import eia930\n", "\n", - "year = 2021\n", + "year = 2022\n", "path_prefix = f\"{year}/\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# load data for each generator\n", - "pudl_out = load_data.initialize_pudl_out(year)\n", - "gens = pudl_out.gens_eia860()\n", + "gens = load_data.load_pudl_table(\n", + " \"generators_eia860\",\n", + " year,\n", + " columns=[\n", + " \"report_date\",\n", + " \"plant_id_eia\",\n", + " \"generator_id\",\n", + " \"prime_mover_code\",\n", + " \"capacity_mw\",\n", + " \"generator_retirement_date\",\n", + " \"operational_status\",\n", + " \"operational_status_code\",\n", + " \"current_planned_generator_operating_date\",\n", + " \"planned_generator_retirement_date\",\n", + " ],\n", + ")\n", "\n", - "# load data about the balancing authority of each plant\n", - "plant_ba = pudl_out.plants_eia860()[\n", - " [\"plant_id_eia\", \"balancing_authority_code_eia\"]\n", - "].rename(columns={\"balancing_authority_code_eia\": \"ba_code\"})\n", + "gens_entity = load_data.load_pudl_table(\n", + " \"generators_entity_eia\",\n", + " columns=[\n", + " \"plant_id_eia\",\n", + " \"generator_id\",\n", + " \"generator_operating_date\",\n", + " \"original_planned_generator_operating_date\",\n", + " ],\n", + ")\n", "\n", - "# merge the ba code into the gens data\n", - "gens = gens.merge(plant_ba, how=\"left\", on=\"plant_id_eia\", validate=\"m:1\")\n", + "plants_entity = load_data.load_pudl_table(\n", + " \"plants_entity_eia\", columns=[\"plant_id_eia\", \"latitude\", \"longitude\"]\n", + ")\n", + "\n", + "plant_ba = load_data.load_pudl_table(\n", + " \"plants_eia860\", year, columns=[\"plant_id_eia\", \"balancing_authority_code_eia\"]\n", + ").rename(columns={\"balancing_authority_code_eia\": \"ba_code\"})\n", "\n", + "# merge the ba code into the gens data\n", + "gens = (\n", + " gens.merge(\n", + " gens_entity, how=\"left\", on=[\"plant_id_eia\", \"generator_id\"], validate=\"m:1\"\n", + " )\n", + " .merge(plant_ba, how=\"left\", on=\"plant_id_eia\", validate=\"m:1\")\n", + " .merge(plants_entity, how=\"left\", on=\"plant_id_eia\", validate=\"m:1\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ "# load wind data\n", "wind = pd.read_excel(\n", " downloads_folder(f\"eia860/eia860{year}/3_2_Wind_Y{year}.xlsx\"),\n", @@ -74,8 +114,15 @@ "wind = wind[[\"plant_id_eia\", \"generator_id\", \"rated_speed_m_per_s\", \"hub_height_m\"]]\n", "\n", "# merge the wind data into the gens data\n", - "gens = gens.merge(wind, how=\"left\", on=[\"plant_id_eia\",\"generator_id\"])\n", - "\n", + "gens = gens.merge(wind, how=\"left\", on=[\"plant_id_eia\",\"generator_id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ "gens.to_csv(outputs_folder(f\"gens_{year}.csv\"), index=False)" ] } @@ -96,7 +143,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.11.4" }, "orig_nbformat": 4, "vscode": { From 0578ff47fa76e6182aeb066a83d1911acd4cc191 Mon Sep 17 00:00:00 2001 From: grgmiller Date: Fri, 4 Oct 2024 09:36:19 -0700 Subject: [PATCH 3/3] update export notebook --- .../explore_data/export_generator_data.ipynb | 110 +++++++++++------- src/oge/data_pipeline.py | 2 +- src/oge/helpers.py | 10 +- src/oge/visualization.py | 4 +- 4 files changed, 81 insertions(+), 45 deletions(-) diff --git a/notebooks/explore_data/export_generator_data.ipynb b/notebooks/explore_data/export_generator_data.ipynb index f56bbfa5..7459a584 100644 --- a/notebooks/explore_data/export_generator_data.ipynb +++ b/notebooks/explore_data/export_generator_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 26, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -10,40 +10,35 @@ "import pandas as pd\n", "import numpy as np\n", "import os\n", - "import plotly.express as px\n", "\n", "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "# # Tell python where to look for modules.\n", "import sys\n", - "sys.path.append('../../../open-grid-emissions/src/')\n", "\n", - "import download_data\n", + "sys.path.append(\"../../../open-grid-emissions/src/oge\")\n", + "\n", + "\n", "import load_data\n", - "from column_checks import get_dtypes\n", + "import helpers\n", "from filepaths import *\n", - "import impute_hourly_profiles\n", - "import data_cleaning\n", - "import output_data\n", - "import emissions\n", - "import validation\n", - "import gross_to_net_generation\n", - "import eia930\n", - "\n", - "year = 2022\n", - "path_prefix = f\"{year}/\"" + "\n", + "year = 2023\n", + "path_prefix = f\"{year}/\"\n", + "\n", + "os.environ[\"PUDL_BUILD\"] = \"nightly\"" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# load data for each generator\n", "gens = load_data.load_pudl_table(\n", - " \"generators_eia860\",\n", + " \"core_eia860__scd_generators\",\n", " year,\n", " columns=[\n", " \"report_date\",\n", @@ -60,7 +55,7 @@ ")\n", "\n", "gens_entity = load_data.load_pudl_table(\n", - " \"generators_entity_eia\",\n", + " \"core_eia__entity_generators\",\n", " columns=[\n", " \"plant_id_eia\",\n", " \"generator_id\",\n", @@ -70,11 +65,14 @@ ")\n", "\n", "plants_entity = load_data.load_pudl_table(\n", - " \"plants_entity_eia\", columns=[\"plant_id_eia\", \"latitude\", \"longitude\"]\n", + " \"core_eia__entity_plants\",\n", + " columns=[\"plant_id_eia\", \"latitude\", \"longitude\", \"city\", \"county\", \"state\"],\n", ")\n", "\n", "plant_ba = load_data.load_pudl_table(\n", - " \"plants_eia860\", year, columns=[\"plant_id_eia\", \"balancing_authority_code_eia\"]\n", + " \"core_eia860__scd_plants\",\n", + " year,\n", + " columns=[\"plant_id_eia\", \"balancing_authority_code_eia\"],\n", ").rename(columns={\"balancing_authority_code_eia\": \"ba_code\"})\n", "\n", "# merge the ba code into the gens data\n", @@ -84,22 +82,59 @@ " )\n", " .merge(plant_ba, how=\"left\", on=\"plant_id_eia\", validate=\"m:1\")\n", " .merge(plants_entity, how=\"left\", on=\"plant_id_eia\", validate=\"m:1\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ + ")\n", + "\n", + "# drop data for generators outside of continental US\n", + "gens = gens[~gens[\"state\"].isin([\"AK\", \"HI\"])]\n", + "\n", + "# if there are longitudes in the wrong hemisphere, try reversing the sign\n", + "gens.loc[gens[\"longitude\"] > 0, \"longitude\"] = (\n", + " -1 * gens.loc[gens[\"longitude\"] > 0, \"longitude\"]\n", + ")\n", + "\n", + "# fix coordinates\n", + "# NOTE: This is the bounding box for the continental US and the southern canadian provinces\n", + "LONGITUDE_MIN = -125.25\n", + "LONGITUDE_MAX = -67\n", + "LATITUDE_MIN = 24.5\n", + "LATITUDE_MAX = 49.25\n", + "\n", + "# are there any old station locations that are outside of North America?\n", + "gens.loc[\n", + " (gens[\"longitude\"] < LONGITUDE_MIN)\n", + " | (gens[\"longitude\"] > LONGITUDE_MAX)\n", + " | (gens[\"latitude\"] < LATITUDE_MIN)\n", + " | (gens[\"latitude\"] > LATITUDE_MAX),\n", + " [\"latitude\", \"longitude\"],\n", + "] = np.NaN\n", + "\n", + "# fill in missing locations\n", + "# get lat/lon\n", + "# loop this process twice to try and address any geocoder errors\n", + "loop_count = 1\n", + "missing_coord = gens[gens[\"longitude\"].isna() | gens[\"latitude\"].isna()]\n", + "while loop_count <= 2 and len(missing_coord) > 0:\n", + " if len(missing_coord) > 0:\n", + " print(f\"Finding coordinates for {len(missing_coord)} missing locations\")\n", + " # only get coordinates when state, county and city are available\n", + " for i in missing_coord.index:\n", + " state = gens.loc[i, \"state\"]\n", + " county = gens.loc[i, \"county\"]\n", + " city = gens.loc[i, \"city\"]\n", + "\n", + " lat, lon = helpers.get_coordinates_of_location(state, county, city)\n", + " gens.loc[i, \"latitude\"] = lat\n", + " gens.loc[i, \"longitude\"] = lon\n", + " missing_coord = gens[gens[\"longitude\"].isna() | gens[\"latitude\"].isna()]\n", + " loop_count += 1\n", + "\n", "# load wind data\n", "wind = pd.read_excel(\n", " downloads_folder(f\"eia860/eia860{year}/3_2_Wind_Y{year}.xlsx\"),\n", " sheet_name=\"Operable\",\n", " header=1,\n", - ").rename(columns=\n", - " {\n", + ").rename(\n", + " columns={\n", " \"Plant Code\": \"plant_id_eia\",\n", " \"Generator ID\": \"generator_id\",\n", " \"Design Wind Speed (mph)\": \"rated_speed_mph\",\n", @@ -114,12 +149,12 @@ "wind = wind[[\"plant_id_eia\", \"generator_id\", \"rated_speed_m_per_s\", \"hub_height_m\"]]\n", "\n", "# merge the wind data into the gens data\n", - "gens = gens.merge(wind, how=\"left\", on=[\"plant_id_eia\",\"generator_id\"])" + "gens = gens.merge(wind, how=\"left\", on=[\"plant_id_eia\", \"generator_id\"])" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -129,7 +164,7 @@ ], "metadata": { "kernelspec": { - "display_name": "open_grid_emissions", + "display_name": "open-grid-emissions-QkuIZ37I", "language": "python", "name": "python3" }, @@ -145,12 +180,7 @@ "pygments_lexer": "ipython3", "version": "3.11.4" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "b893a8d8fbb165be288531947168b3b06bdb1508177327a21c265e0400df3100" - } - } + "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 diff --git a/src/oge/data_pipeline.py b/src/oge/data_pipeline.py index 4be11e14..48f064a5 100644 --- a/src/oge/data_pipeline.py +++ b/src/oge/data_pipeline.py @@ -142,7 +142,7 @@ def main(args): logger.info("1. Downloading data") # PUDL download_data.download_pudl_data(source="aws") - logger.info(f"Using {os.getenv('PUDL_BUILD', default="stable")} PUDL build") + logger.info(f"Using {os.getenv('PUDL_BUILD', default='stable')} PUDL build") # eGRID download_data.download_egrid_files() # EIA-930 diff --git a/src/oge/helpers.py b/src/oge/helpers.py index 0437fdcd..c4a844aa 100644 --- a/src/oge/helpers.py +++ b/src/oge/helpers.py @@ -2,6 +2,7 @@ import pandas as pd from geopy.geocoders import Nominatim +from geopy.exc import GeocoderUnavailable from timezonefinder import TimezoneFinder from urllib3.exceptions import ReadTimeoutError @@ -849,8 +850,8 @@ def get_coordinates_of_location(state: str, county: str, city: str) -> tuple[flo """ if pd.isna(state): return np.NaN, np.NaN - if pd.isna(city): - if not pd.isna(county): + if pd.isna(city) | (city == "unsited"): + if not (pd.isna(county) | (county == "NOT IN FILE")): query = f"{county} county, {state}, USA" else: query = f"{state}, USA" @@ -860,10 +861,15 @@ def get_coordinates_of_location(state: str, county: str, city: str) -> tuple[flo try: location = geolocator.geocode(query, country_codes="us") if location is None: + logger.warning(f"No location returned for {query}") return np.NaN, np.NaN else: return float(location.raw["lat"]), float(location.raw["lon"]) except ReadTimeoutError: + logger.warning(f"ReadTimeoutError for {query}") + return np.NaN, np.NaN + except GeocoderUnavailable: + logger.warning(f"GeocoderUnavailable for {query}") return np.NaN, np.NaN diff --git a/src/oge/visualization.py b/src/oge/visualization.py index 18595166..95911491 100644 --- a/src/oge/visualization.py +++ b/src/oge/visualization.py @@ -60,9 +60,9 @@ def graph_hourly_data_by_fuel_category( "biomass", "petroleum", "waste", - "solar", - "wind", "natural_gas", + "wind", + "solar", ] hourly_data = hourly_data[hourly_data[fuel_category_name] != "total"]