metoppv · SamGriffithsMO · Sep 25, 2024 · Sep 26, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
@@ -7,6 +7,7 @@
 """
 
 from collections import OrderedDict
+from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Tuple
 
 from iris.cube import Cube, CubeList
@@ -15,6 +16,7 @@
     get_diagnostic_cube_name_from_probability_name,
 )
 from improver.utilities.cube_manipulation import MergeCubes
+from improver.utilities.load import load_cubelist
 
 
 def split_forecasts_and_truth(
@@ -266,3 +268,59 @@ def add_warning_comment(forecast: Cube) -> Cube:
             "however, no calibration has been applied."
         )
     return forecast
+
+
+def get_cube_from_directory(
+    directory,
+    cycle_point=None,
+    max_days_offset=None,
+    date_format="%Y%m%dT%H%MZ",
+    verbose=False,
+):
+    """
+    loads and merges all netCDF files in a directory
+
+    To switch on the max offset filter, both cycle_point and max_days_offset
+    need to be provided
+
+    Args:
+        directory (pathlib.Path):
+            The path to the directory.
+        cycle_point (str):
+            The cycle point of the forecast, used to filter files
+        max_days_offset (int):
+            Maximum number of days before cycle_point to consider files,
+            Defined as a postive int that is subtracted from the cycle_point
-            Maximum number of days before cycle_point to consider files,
-            Defined as a postive int that is subtracted from the cycle_point
+            Maximum number of days before cycle_point to consider files.
+            Defined as a positive int that is subtracted from the cycle_point
-            Maximum number of days before cycle_point to consider files,
-            Defined as a postive int that is subtracted from the cycle_point
+            Maximum number of days before cycle_point to consider files.
+            Defined as a positive int that is subtracted from the cycle_point
+        date_format (str):
+            format of the cyclepoint and datetime in the filename, used by
+            datetime.strptime
+        verbose (bool):
+            switch on verbose output
+
+    Returns:
+        Cube
+    """
+    files = [*map(str, directory.glob("*.nc"))]
+
+    if len(files) == 0:
+        if verbose:
+            print(f"No files found in {directory}")
+        return None
+
+    cubes = load_cubelist(files)
+    if max_days_offset and cycle_point:
+        cycle_point = datetime.strptime(cycle_point, date_format)
+        earliest_time = cycle_point - timedelta(days=max_days_offset)
+        for cube in cubes.copy():
+            rt = cube.coord("forecast_reference_time").points[0]
+            period = cube.coord("forecast_period").points[0]
+            dt = datetime.fromtimestamp(rt + period)
+            if dt < earliest_time:
+                cubes.remove(cube)
+
+    if len(cubes) < 2:
+        if verbose:
+            print(f"Not enough files found in {directory}")
+        return None
+
+    return MergeCubes()(cubes)
diff --git a/improver/calibration/ensemble_calibration.py b/improver/calibration/ensemble_calibration.py
@@ -25,6 +25,7 @@
 from scipy.stats import norm
 
 from improver import BasePlugin, PostProcessingPlugin
+from improver.calibration import get_cube_from_directory
 from improver.calibration.utilities import (
     broadcast_data_to_time_coord,
     check_data_sufficiency,
@@ -1366,6 +1367,61 @@ def process(
         return coefficients_cubelist
 
 
+class MetaEstimateCoefficientsForEnsembleCalibration(BasePlugin):
-class MetaEstimateCoefficientsForEnsembleCalibration(BasePlugin):
+class MetaEnsembleCoeffEstimator(BasePlugin):
-class MetaEstimateCoefficientsForEnsembleCalibration(BasePlugin):
+class MetaEnsembleCoeffEstimator(BasePlugin):
+    """
+    Meta plugin for handling directories of netcdfs as inputs, instead of cubes
+    """
+
+    def __init__(
+        self,
+        distribution,
+        truth_attribute,
+        cycle_point=None,
+        max_days_offset=None,
+        point_by_point=False,
+        use_default_initial_guess=False,
+        units=None,
+        predictor="mean",
+        tolerance: float = 0.02,
+        max_iterations: int = 1000,
+    ):
+        self.distribution = distribution
+        self.truth_attribute = truth_attribute
+        self.cycle_point = cycle_point
+        self.max_days_offset = max_days_offset
+        self.point_by_point = point_by_point
+        self.use_default_initial_guess = use_default_initial_guess
+        self.units = units
+        self.predictor = predictor
+        self.tolerance = tolerance
+        self.max_iterations = max_iterations
+
+    def process(self, forecast_directory, truth_directory, land_sea_mask=None):
+        forecast = get_cube_from_directory(
+            forecast_directory,
+            cycle_point=self.cycle_point,
+            max_days_offset=self.max_days_offset,
+        )
+        truth = get_cube_from_directory(
+            truth_directory,
+            cycle_point=self.cycle_point,
+            max_days_offset=self.max_days_offset,
+        )
+
+        if forecast and truth:
+            plugin = EstimateCoefficientsForEnsembleCalibration(
+                self.distribution,
+                point_by_point=self.point_by_point,
+                use_default_initial_guess=self.use_default_initial_guess,
+                desired_units=self.units,
+                predictor=self.predictor,
+                tolerance=self.tolerance,
+                max_iterations=self.max_iterations,
+            )
+            return plugin(forecast, truth, landsea_mask=land_sea_mask)
+        return None
+
+
 class CalibratedForecastDistributionParameters(BasePlugin):
     """
     Class to calculate calibrated forecast distribution parameters given an

diff --git a/improver/calibration/reliability_calibration.py b/improver/calibration/reliability_calibration.py
@@ -18,6 +18,7 @@
 from numpy.ma.core import MaskedArray
 
 from improver import BasePlugin, PostProcessingPlugin
+from improver.calibration import get_cube_from_directory
 from improver.calibration.utilities import (
     check_forecast_consistency,
     create_unified_frt_coord,
@@ -527,6 +528,52 @@ def process(
         return MergeCubes()(reliability_tables, copy=False)
 
 
+class MetaConstructReliabilityCalibrationTables(BasePlugin):
+    """
+    Meta plugin for handling directories of netcdfs as inputs, instead of cubes
+    """
+
+    def __init__(
+        self,
+        truth_attribute,
+        n_probability_bins: int = 5,
+        single_value_lower_limit: bool = False,
+        single_value_upper_limit: bool = False,
+        aggregate_coordinates: list = None,
+        cycle_point: Optional[str] = None,
+        max_days_offset: Optional[int] = None,
+    ):
+        self.truth_attribute = truth_attribute
+        self.n_probability_bins = n_probability_bins
+        self.single_value_lower_limit = single_value_lower_limit
+        self.single_value_upper_limit = single_value_upper_limit
+        self.cycle_point = cycle_point
+        self.max_days_offset = max_days_offset
+        self.aggregate_coordinates = aggregate_coordinates
+
+    def process(self, forecast_directory, truth_directory):
+        forecast = get_cube_from_directory(
+            forecast_directory,
+            cycle_point=self.cycle_point,
+            max_days_offset=self.max_days_offset,
+        )
+        truth = get_cube_from_directory(
+            truth_directory,
+            cycle_point=self.cycle_point,
+            max_days_offset=self.max_days_offset,
+        )
+
+        if forecast and truth:
+            plugin = ConstructReliabilityCalibrationTables(
+                # truth_attribute=self.truth_attribute,
+                n_probability_bins=self.n_probability_bins,
+                single_value_lower_limit=self.single_value_lower_limit,
+                single_value_upper_limit=self.single_value_upper_limit,
+            )
+            return plugin(forecast, truth, aggregate_coords=self.aggregate_coordinates)
+        return None
+
+
 class AggregateReliabilityCalibrationTables(BasePlugin):
 
     """This plugin enables the aggregation of multiple reliability calibration

diff --git a/improver/cli/construct_reliability_tables.py b/improver/cli/construct_reliability_tables.py
@@ -11,12 +11,16 @@
 @cli.clizefy
 @cli.with_output
 def process(
-    *cubes: cli.inputcube,
-    truth_attribute,
+    forecast_directory: cli.inputpath,
+    truth_directory: cli.inputpath,
+    *,
+    truth_attribute: str = "",
     n_probability_bins: int = 5,
     single_value_lower_limit: bool = False,
     single_value_upper_limit: bool = False,
     aggregate_coordinates: cli.comma_separated_list = None,
+    cycle_point: str = None,
+    max_days_offset: int = None,
 ):
     """Populate reliability tables for use in reliability calibration.
 
@@ -26,12 +30,10 @@ def process(
     cubes and the thresholded truth.
 
     Args:
-        cubes (list of iris.cube.Cube):
-            A list of cubes containing the historical probability forecasts and
-            corresponding truths used for calibration. These cubes must include
-            the same diagnostic name in their names, and must both have
-            equivalent threshold coordinates. The cubes will be distinguished
-            using the user provided truth attribute.
+        forecast_directory (posix.Path):
+            The path to a directory containing the historical forecasts
+        truth_directory (posix.Path):
+            The path to a directory containing the truths to be used
         truth_attribute (str):
             An attribute and its value in the format of "attribute=value",
             which must be present on truth cubes.
@@ -51,21 +53,28 @@ def process(
             calibration table using summation. This is equivalent to constructing
             then using aggregate-reliability-tables but with reduced memory
             usage due to avoiding large intermediate data.
+        cycle_point (str):
+            Current cycle point. Used in combination with max_days_offset to identify
+            which historic forecasts and truths to use.
+        max_days_offset (int):
+            Maximum offset in days, used to identify the oldest acceptable inputs
+
 
     Returns:
         iris.cube.Cube:
             Reliability tables for the forecast diagnostic with a leading
             threshold coordinate.
     """
-    from improver.calibration import split_forecasts_and_truth
     from improver.calibration.reliability_calibration import (
-        ConstructReliabilityCalibrationTables,
+        MetaConstructReliabilityCalibrationTables,
     )
 
-    forecast, truth, _ = split_forecasts_and_truth(cubes, truth_attribute)
-
-    return ConstructReliabilityCalibrationTables(
+    return MetaConstructReliabilityCalibrationTables(
+        truth_attribute=truth_attribute,
         n_probability_bins=n_probability_bins,
         single_value_lower_limit=single_value_lower_limit,
         single_value_upper_limit=single_value_upper_limit,
-    )(forecast, truth, aggregate_coordinates)
+        aggregate_coordinates=aggregate_coordinates,
+        cycle_point=cycle_point,
+        max_days_offset=max_days_offset,
+    )(forecast_directory, truth_directory)
diff --git a/improver/cli/estimate_emos_coefficients.py b/improver/cli/estimate_emos_coefficients.py
@@ -13,9 +13,14 @@
 @cli.clizefy
 @cli.with_output
 def process(
-    *cubes: cli.inputcube,
+    forecast_directory: cli.inputpath,
+    truth_directory: cli.inputpath,
+    land_sea_mask: cli.inputcube = None,
-    forecast_directory: cli.inputpath,
-    truth_directory: cli.inputpath,
-    land_sea_mask: cli.inputcube = None,
+    forecast_cubes: cli.calib_input_dir,
+    truth_cubes: cli.calib_input_dir,
+    land_sea_mask: cli.inputcube = None,
-    forecast_directory: cli.inputpath,
-    truth_directory: cli.inputpath,
-    land_sea_mask: cli.inputcube = None,
+    *cubes: Union[cli.calib_input_dir, cli.inputcube],
-    forecast_directory: cli.inputpath,
-    truth_directory: cli.inputpath,
-    land_sea_mask: cli.inputcube = None,
+    forecast_cubes: cli.calib_input_dir,
+    truth_cubes: cli.calib_input_dir,
+    land_sea_mask: cli.inputcube = None,
-    forecast_directory: cli.inputpath,
-    truth_directory: cli.inputpath,
-    land_sea_mask: cli.inputcube = None,
+    *cubes: Union[cli.calib_input_dir, cli.inputcube],
+    *,
     distribution,
     truth_attribute,
+    cycle_point: str = None,
+    max_days_offset: int = None,
     point_by_point=False,
     use_default_initial_guess=False,
     units=None,
@@ -32,13 +37,12 @@ def process(
     The estimated coefficients are output as a cube.
 
     Args:
-        cubes (list of iris.cube.Cube):
-            A list of cubes containing the historical forecasts and
-            corresponding truth used for calibration. They must have the same
-            cube name and will be separated based on the truth attribute.
-            Optionally this may also contain a single land-sea mask cube on the
-            same domain as the historic forecasts and truth (where land points
-            are set to one and sea points are set to zero).
+        forecast_directory (posix.Path):
+            The path to a directory containing the historical forecasts
+        truth_directory (posix.Path):
+            The path to a directory containing the truths to be used
+        land_sea_mask (iris.cube.Cube):
+            Optional land-sea mask cube, used as a static additonal predictor.
-            Optional land-sea mask cube, used as a static additonal predictor.
+            Optional land-sea mask cube, used as a static additional predictor.
-            Optional land-sea mask cube, used as a static additonal predictor.
+            Optional land-sea mask cube, used as a static additional predictor.
         distribution (str):
             The distribution that will be used for minimising the
             Continuous Ranked Probability Score when estimating the EMOS
@@ -81,27 +85,31 @@ def process(
             is raised. If the predictor is "realizations", then the number of
             iterations may require increasing, as there will be more
             coefficients to solve.
-
+        cycle_point (str):
+            Current cycle point. Used in combination with max_days_offset to identify
+            which historic forecasts and truths to use.
+        max_days_offset (int):
+            Maximum offset in days, used to identify the oldest acceptable inputs
     Returns:
         iris.cube.CubeList:
             CubeList containing the coefficients estimated using EMOS. Each
             coefficient is stored in a separate cube.
     """
 
-    from improver.calibration import split_forecasts_and_truth
     from improver.calibration.ensemble_calibration import (
-        EstimateCoefficientsForEnsembleCalibration,
+        MetaEstimateCoefficientsForEnsembleCalibration,
     )
 
-    forecast, truth, land_sea_mask = split_forecasts_and_truth(cubes, truth_attribute)
-
-    plugin = EstimateCoefficientsForEnsembleCalibration(
+    plugin = MetaEstimateCoefficientsForEnsembleCalibration(
         distribution,
+        truth_attribute,
+        cycle_point=cycle_point,
+        max_days_offset=max_days_offset,
         point_by_point=point_by_point,
         use_default_initial_guess=use_default_initial_guess,
-        desired_units=units,
+        units=units,
         predictor=predictor,
         tolerance=tolerance,
         max_iterations=max_iterations,
     )
-    return plugin(forecast, truth, landsea_mask=land_sea_mask)
+    return plugin(forecast_directory, truth_directory, land_sea_mask=land_sea_mask)
-    return plugin(forecast_directory, truth_directory, land_sea_mask=land_sea_mask)
+    return plugin(**cubes)
-    return plugin(forecast_directory, truth_directory, land_sea_mask=land_sea_mask)
+    return plugin(**cubes)
diff --git a/improver_tests/acceptance/SHA256SUMS b/improver_tests/acceptance/SHA256SUMS
@@ -315,13 +315,13 @@ b60f6046c86319f8b7ca3b5d7902dbaf3a52f571f30ba56a1a4bc814c42dd341  ./combine/mini
 cd5fe4e4ef61d890c30cc9cbd236bf0dfdbedd5f12f8a92803aa57be84c0d9ab  ./combine/multiplication_cellmethods/kgo.nc
 f1ae76b9374c5d1076b89a7348fe9bbc393a12ae4ccdc660a170ba5ff0f823ab  ./combine/multiplication_cellmethods/precipitation_accumulation-PT01H.nc
 b8934494b4a24daa2408c4d95a2367e328e25e8323e34c67ef6026d51021be32  ./combine/multiplication_cellmethods/precipitation_is_snow.nc
-0bd96af6cb5c6caa045e397589dd0ce3b498af837d989fe73326f5e9459c6054  ./construct-reliability-tables/basic/forecast_0.nc
-fbc14286b4ce41e2e60df0870ae4911c1b00a38ec96912f43c6187fcaf7d02f6  ./construct-reliability-tables/basic/forecast_1.nc
+0bd96af6cb5c6caa045e397589dd0ce3b498af837d989fe73326f5e9459c6054  ./construct-reliability-tables/basic/forecast/forecast_0.nc
+fbc14286b4ce41e2e60df0870ae4911c1b00a38ec96912f43c6187fcaf7d02f6  ./construct-reliability-tables/basic/forecast/forecast_1.nc
 0d0edf9751a2019db952907700b02499ec9f1c360db4591a8012ca247a841c73  ./construct-reliability-tables/basic/kgo_aggregated.nc
 902e5cb9d3dc5d2b78bb99aff8370f9815adf5064b2caeb7abed73a56a897a43  ./construct-reliability-tables/basic/kgo_single_value_bins.nc
 72d4fd0655d1b7a2bc11d85741ec944f195c59813ae629e6858116c4e09eccb0  ./construct-reliability-tables/basic/kgo_without_single_value_bins.nc
-8ed50464c34b8673d98d1256d1c11b9eeea911dc79f7f75d425a590bf8697301  ./construct-reliability-tables/basic/truth_0.nc
-3999adb3749052d9efdfab863427a20a1fabbca06ff430c6c9cf5f89d1ea4d60  ./construct-reliability-tables/basic/truth_1.nc
+8ed50464c34b8673d98d1256d1c11b9eeea911dc79f7f75d425a590bf8697301  ./construct-reliability-tables/basic/truth/truth_0.nc
+3999adb3749052d9efdfab863427a20a1fabbca06ff430c6c9cf5f89d1ea4d60  ./construct-reliability-tables/basic/truth/truth_1.nc
 9795b9758a88e2c4d4171c8b08304f7f0711e03acda66a7394333f8b919ccf50  ./convection-ratio/basic/kgo.nc
 74f850942572aa99de807396d48bd80dd96088c638a9d5fa379b95f7c5ad8614  ./convection-ratio/basic/lwe_convective_precipitation_rate.nc
 b946c7687cb9ed02a12a934429a31306004ad45214cf4b451468b077018c0911  ./convection-ratio/basic/lwe_stratiform_precipitation_rate.nc