Skip to content

Commit

Permalink
Reimplement validation for subannual/datetime data (#129)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Nov 17, 2021
1 parent 3c7dae9 commit d2760ec
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 9 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# This workflow will install Python dependencies and run the tests
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Pytest

on:
push:
branches: [ '**' ]
pull_request:
branches: [ '**' ]

jobs:
tests:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2

- name: Set up Python 3.9
uses: actions/setup-python@v1
with:
python-version: 3.9

- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest
- name: Install and test package functions
run: |
pip install --editable .
pytest tests
6 changes: 3 additions & 3 deletions .github/workflows/validation.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This workflow will install Python dependencies and validate the project
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Validate the project
name: Nomenclature

on:
push:
Expand All @@ -10,7 +10,7 @@ on:
branches: [ '**' ]

jobs:
pytest:
validation:

runs-on: ubuntu-latest

Expand All @@ -22,7 +22,7 @@ jobs:
with:
python-version: 3.9

- name: Install requirements
- name: Install dependencies
run: pip install -r requirements.txt

- name: Run the nomenclature project validation
Expand Down
6 changes: 3 additions & 3 deletions openentrance/tests/test_core.py → tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import openentrance as oe
from openentrance import iso_mapping, nuts_hierarchy


def test_iso_mapping():
# check that iso-mapping dictionary is not empty and has specific elements
for name in ["GR", "GRC", "EL"]:
assert oe.iso_mapping[name] == "Greece"
assert iso_mapping[name] == "Greece"


def test_nuts_hierarchy():
# check that nuts-hierarchy is not empty and has specific elements
assert oe.nuts_hierarchy["Belgium"]["BE2"]["BE24"] == ["BE241", "BE242"]
assert nuts_hierarchy["Belgium"]["BE2"]["BE24"] == ["BE241", "BE242"]
54 changes: 54 additions & 0 deletions tests/test_definitions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import nomenclature

definition = nomenclature.DataStructureDefinition("definitions")


def test_variables():
# check that regions dictionary is not empty and has specific element
assert "Emissions|CO2" in definition.variable


def test_variables_fuel_types():
# check that exploding of <Fuel> to fuels works (including CCS subcategory)
obs = definition.variable["Secondary Energy|Electricity|Gas"]
exp = (
"Net electricity production from natural gas "
"(including methane from biomass or hydrogenation)"
)
assert obs["description"] == exp

obs = definition.variable["Secondary Energy|Electricity|Gas|w/ CCS"]
exp = (
"Net electricity production from natural gas (including methane "
"from biomass or hydrogenation) with a CO2 capture component"
)
assert obs["description"] == exp


def test_variables_industry_types():
# check that exploding of <industry> to industries works
obs = definition.variable["Capital|iAGRI"]
exp = "Total capital costs spend by agriculture"
assert obs["description"] == exp


def test_variables_transport_types():
# check that exploding of <transport> to transportation modes works
obs = definition.variable["Energy Service|Transportation|Freight|Rail"]
exp = (
"Provision of energy services related to freight "
"rail-based transportation technologies"
)
assert obs["description"] == exp


def test_variables_product_types():
# check that exploding of <product> to procuts works
obs = definition.variable["Consumption|Households|pAGRI|Imported"]
exp = "Consumption of imported agriculture by households"
assert obs["description"] == exp


def test_regions():
# check that regions dictionary is not empty and has specific element
assert "Europe" in definition.region
90 changes: 90 additions & 0 deletions tests/test_validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import pandas as pd
from pyam import IamDataFrame
import pytest

import sys

sys.path.append("..")

from workflow import main as workflow


TEST_DF = pd.DataFrame(
[
["model_a", "scen_a", "Europe", "Primary Energy", "EJ/yr", 1, 6.0],
],
columns=["model", "scenario", "region", "variable", "unit", 2005, 2010],
)
df = IamDataFrame(TEST_DF)


def validate(df):
try:
workflow(df)
return True
except ValueError as e:
print(e)
return False


def test_validate():
# test simple validation
assert validate(df)


def test_validate_fail():
# test that simple validation fails on variable and region dimension
assert not (validate(df.rename(variable={"Primary Energy": "foo"})))
assert not (validate(df.rename(region={"Europe": "foo"})))


def _test_validate_directional():
# test that validation works as expected with directional data
assert validate(df.rename(region={"Europe": "Austria>Germany"}))
assert not validate(df.rename(region={"Europe": "Austria>foo"}))

# test that directional data with more than one `>` fails
assert not validate(df.rename(region={"Europe": "Austria>Italy>France"}))


def test_validate_subannual_months():
# test that validation works as expected with months
# (and representative timeslices generally)
assert validate(IamDataFrame(TEST_DF, subannual="January"))
assert not validate(IamDataFrame(TEST_DF, subannual="foo"))


@pytest.mark.parametrize(
"subannual, status",
[
("01-01 00:00+01:00", True),
("01-01 00:00", False),
("01-01 00:00+02:00", False),
("01-32 00:00+01:00", False),
],
)
def test_validate_subannual_datetime(subannual, status):
# test that validation works as expected with continuous time as subannual
assert validate(IamDataFrame(TEST_DF, subannual=subannual)) == status


@pytest.mark.parametrize(
"rename_mapping, status",
[
({2005: "2005-06-17 00:00+01:00", 2010: "2010-06-17 00:00+01:00"}, True),
({2005: "2005-06-17 00:00+02:00", 2010: "2010-06-17 00:00+02:00"}, False),
({2005: "2005-06-17 00:00", 2010: "2010-06-17 00:00"}, False),
],
)
def test_validate_time_entry(rename_mapping, status):
# test that validation works as expected with datetime-domain
_df = IamDataFrame(
IamDataFrame(TEST_DF)
.data.rename(columns={"year": "time"})
.replace(rename_mapping)
)
assert validate(_df) == status


def test_validate_unit_entry():
assert not (validate(df.rename(unit={"EJ/yr": "MWh"})))
33 changes: 30 additions & 3 deletions workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,49 @@

here = Path(__file__).absolute().parent
logger = logging.getLogger(__name__)
from datetime import datetime, timedelta


# datetime must be in Central European Time (CET)
EXP_TZ = "UTC+01:00"
EXP_TIME_OFFSET = timedelta(seconds=3600)


def main(df: pyam.IamDataFrame) -> pyam.IamDataFrame:
"""Main function for validation and processing"""
logger.info("Starting openENTRANCE timeseries-upload processing workflow...")

if "subannual" in df.dimensions:
if "subannual" in df.dimensions or df.time_col == "time":
dimensions = ["region", "variable", "subannual"]
else:
dimensions = ["region", "variable"]

definition = DataStructureDefinition(here / "definitions", dimensions=dimensions)
definition.validate(df)

definition.validate(df, dimensions=["region", "variable"])

# convert to subannual format if data provided in datetime format
if df.time_col == "time":
logger.info('Re-casting from "time" column to categorical "subannual" format')
df.swap_time_for_year(inplace=True)
df = df.swap_time_for_year(subannual=True)

# check that any datetime-like items in "subannual" are valid datetime and UTC+01:00
if "subannual" in df.dimensions:
_datetime = [s for s in df.subannual if s not in definition.subannual]

for d in _datetime:
try:
_dt = datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M%z")
except ValueError:
try:
datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M")
except ValueError:
raise ValueError(f"Invalid subannual timeslice: {d}")

raise ValueError(f"Missing timezone: {d}")

# casting to datetime with timezone was successful
if not (_dt.tzname() == EXP_TZ or _dt.utcoffset() == EXP_TIME_OFFSET):
raise ValueError(f"Invalid timezone: {d}")

return df

0 comments on commit d2760ec

Please sign in to comment.