From c3ee496c9545c5122a7628579eaa3d5d7d5a4cc7 Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Wed, 25 Sep 2024 16:28:58 +0100
Subject: [PATCH 01/12] input directories for estimate emos

---
 improver/calibration/__init__.py           | 46 +++++++++++++++++++++-
 improver/cli/estimate_emos_coefficients.py | 25 +++++++-----
 2 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index d85ee1c244..2307af40ef 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -15,7 +15,7 @@
     get_diagnostic_cube_name_from_probability_name,
 )
 from improver.utilities.cube_manipulation import MergeCubes
-
+from improver.utilities.load import load_cubelist
 
 def split_forecasts_and_truth(
     cubes: List[Cube], truth_attribute: str
@@ -266,3 +266,47 @@ def add_warning_comment(forecast: Cube) -> Cube:
             "however, no calibration has been applied."
         )
     return forecast
+
+from datetime import datetime, timedelta
+
+def get_cube_from_directory(directory, cycle_point=None, max_days_offset=None, date_format='%Y%m%dT%H%MZ'):
+    """
+    loads and merges all netCDF files in a directory
+
+    To switch on the max offset filter, both cycle_point and max_days_offset 
+    need to be provided
+    Args:
+        directory (pathlib.Path):
+            The path to the directory.
+        cycle_point (str):
+            The cycle point of the forecast, used to filter files
+        max_days_offset (int):
+            Maximum number of days before cycle_point to consider files,
+            Defined as a postive int that is subtracted from the cycle_point
+        date_format (str):
+            format of the cyclepoint and datetime in the filename, used by 
+            datetime.strptime
+
+    Returns:
+        Cube
+    """
+    files = [*map(str, directory.glob("*.nc"))]
+    if len(files) == 0:
+        # This is probably too serious - is there a quiet way to handle this?
+        raise ValueError(f"No files found in {directory}")
+    
+    if max_days_offset and cycle_point:
+        # Ignore files if they are older than max_days_offset days from cycle_point
+        cycle_point = datetime.strptime(cycle_point, date_format)
+        earliest_time = cycle_point - timedelta(days=max_days_offset)
+        for filename in files.copy():
+            file_datetime = filename.split('/')[-1].split('-')[0]
+            if datetime.strptime(file_datetime, date_format) < earliest_time:
+                files.remove(filename)
+    
+    if len(files) < 2:
+        raise ValueError(f"Not enough files found in {directory}")
+
+    # Check for a lower limit on number of files? - 2
+    cubes = load_cubelist(files)
+    return MergeCubes()(cubes)
\ No newline at end of file
diff --git a/improver/cli/estimate_emos_coefficients.py b/improver/cli/estimate_emos_coefficients.py
index 88b60986bc..54815db9d2 100755
--- a/improver/cli/estimate_emos_coefficients.py
+++ b/improver/cli/estimate_emos_coefficients.py
@@ -13,9 +13,14 @@
 @cli.clizefy
 @cli.with_output
 def process(
-    *cubes: cli.inputcube,
+    forecast_directory: cli.inputpath,
+    truth_directory: cli.inputpath,
+    land_sea_mask: cli.inputcube = None,
+    *,
     distribution,
     truth_attribute,
+    cycle_point=None,
+    max_days_offset=None,
     point_by_point=False,
     use_default_initial_guess=False,
     units=None,
@@ -32,13 +37,12 @@ def process(
     The estimated coefficients are output as a cube.
 
     Args:
-        cubes (list of iris.cube.Cube):
-            A list of cubes containing the historical forecasts and
-            corresponding truth used for calibration. They must have the same
-            cube name and will be separated based on the truth attribute.
-            Optionally this may also contain a single land-sea mask cube on the
-            same domain as the historic forecasts and truth (where land points
-            are set to one and sea points are set to zero).
+        forecast_directory (posix.Path):
+            The path to a directory containing the historical forecasts
+        truth_directory (posix.Path):
+            The path to a directory containing the truths to be used
+        land_sea_mask (iris.cube.Cube):
+            Optional land-sea mask cube, used as a static additonal predictor.
         distribution (str):
             The distribution that will be used for minimising the
             Continuous Ranked Probability Score when estimating the EMOS
@@ -88,12 +92,13 @@ def process(
             coefficient is stored in a separate cube.
     """
 
-    from improver.calibration import split_forecasts_and_truth
     from improver.calibration.ensemble_calibration import (
         EstimateCoefficientsForEnsembleCalibration,
     )
+    from improver.utilities.load import get_cube_from_directory
 
-    forecast, truth, land_sea_mask = split_forecasts_and_truth(cubes, truth_attribute)
+    forecast = get_cube_from_directory(forecast_directory, cycle_point=cycle_point, max_days_offset=max_days_offset)
+    truth = get_cube_from_directory(truth_directory, cycle_point=cycle_point, max_days_offset=max_days_offset)
 
     plugin = EstimateCoefficientsForEnsembleCalibration(
         distribution,

From 114776cde6433cb48ffa04a6967c191edc9297fc Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Thu, 26 Sep 2024 09:56:19 +0100
Subject: [PATCH 02/12] correct import

---
 improver/cli/estimate_emos_coefficients.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/improver/cli/estimate_emos_coefficients.py b/improver/cli/estimate_emos_coefficients.py
index 54815db9d2..c464a795f3 100755
--- a/improver/cli/estimate_emos_coefficients.py
+++ b/improver/cli/estimate_emos_coefficients.py
@@ -19,8 +19,8 @@ def process(
     *,
     distribution,
     truth_attribute,
-    cycle_point=None,
-    max_days_offset=None,
+    cycle_point: str = None,
+    max_days_offset: int = None,
     point_by_point=False,
     use_default_initial_guess=False,
     units=None,
@@ -95,7 +95,7 @@ def process(
     from improver.calibration.ensemble_calibration import (
         EstimateCoefficientsForEnsembleCalibration,
     )
-    from improver.utilities.load import get_cube_from_directory
+    from improver.calibration import get_cube_from_directory
 
     forecast = get_cube_from_directory(forecast_directory, cycle_point=cycle_point, max_days_offset=max_days_offset)
     truth = get_cube_from_directory(truth_directory, cycle_point=cycle_point, max_days_offset=max_days_offset)

From b193539e2c58c2e99b1832b9d7a2a858428a7cc6 Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Fri, 27 Sep 2024 08:56:06 +0100
Subject: [PATCH 03/12] meta estimate emos

---
 improver/calibration/ensemble_calibration.py | 43 +++++++++++++++++++-
 improver/cli/estimate_emos_coefficients.py   | 13 +++---
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/improver/calibration/ensemble_calibration.py b/improver/calibration/ensemble_calibration.py
index 200137ecb9..fcf74efa05 100644
--- a/improver/calibration/ensemble_calibration.py
+++ b/improver/calibration/ensemble_calibration.py
@@ -51,7 +51,7 @@
     generate_mandatory_attributes,
 )
 from improver.utilities.cube_manipulation import collapsed, enforce_coordinate_ordering
-
+from improver.calibration import get_cube_from_directory
 
 class ContinuousRankedProbabilityScoreMinimisers(BasePlugin):
     """
@@ -1365,6 +1365,47 @@ def process(
         )
         return coefficients_cubelist
 
+class MetaEstimateCoefficientsForEnsembleCalibration(BasePlugin):
+    """
+    Meta plugin for handling directories of netcdfs as inputs, instead of cubes
+    """
+    def __init__(self,
+        distribution,
+        truth_attribute,
+        cycle_point: str = None,
+        max_days_offset: int = None,
+        point_by_point=False,
+        use_default_initial_guess=False,
+        units=None,
+        predictor="mean",
+        tolerance: float = 0.02,
+        max_iterations: int = 1000,
+    ):
+        self.distribution = distribution
+        self.truth_attribute = truth_attribute
+        self.cycle_point = cycle_point
+        self.max_days_offset = max_days_offset
+        self.point_by_point = point_by_point
+        self.use_default_initial_guess = use_default_initial_guess
+        self.units = units
+        self.predictor = predictor
+        self.tolerance = tolerance
+        self.max_iterations = max_iterations
+
+    def process(self, forecast_directory, truth_directory, land_sea_mask=None):
+        self.forecast = get_cube_from_directory(forecast_directory, cycle_point=self.cycle_point, max_days_offset=self.max_days_offset)
+        self.truth = get_cube_from_directory(truth_directory, cycle_point=self.cycle_point, max_days_offset=self.max_days_offset)
+        plugin = EstimateCoefficientsForEnsembleCalibration(
+            self.distribution,
+            point_by_point=self.point_by_point,
+            use_default_initial_guess=self.use_default_initial_guess,
+            desired_units=self.units,
+            predictor=self.predictor,
+            tolerance=self.tolerance,
+            max_iterations=self.max_iterations,
+        )
+        return plugin(self.forecast, self.truth, landsea_mask=land_sea_mask)
+
 
 class CalibratedForecastDistributionParameters(BasePlugin):
     """
diff --git a/improver/cli/estimate_emos_coefficients.py b/improver/cli/estimate_emos_coefficients.py
index c464a795f3..f40f575750 100755
--- a/improver/cli/estimate_emos_coefficients.py
+++ b/improver/cli/estimate_emos_coefficients.py
@@ -93,15 +93,14 @@ def process(
     """
 
     from improver.calibration.ensemble_calibration import (
-        EstimateCoefficientsForEnsembleCalibration,
+        MetaEstimateCoefficientsForEnsembleCalibration,
     )
-    from improver.calibration import get_cube_from_directory
 
-    forecast = get_cube_from_directory(forecast_directory, cycle_point=cycle_point, max_days_offset=max_days_offset)
-    truth = get_cube_from_directory(truth_directory, cycle_point=cycle_point, max_days_offset=max_days_offset)
-
-    plugin = EstimateCoefficientsForEnsembleCalibration(
+    plugin = MetaEstimateCoefficientsForEnsembleCalibration(
         distribution,
+        truth_attribute,
+        cycle_point = cycle_point,
+        max_days_offset = max_days_offset,
         point_by_point=point_by_point,
         use_default_initial_guess=use_default_initial_guess,
         desired_units=units,
@@ -109,4 +108,4 @@ def process(
         tolerance=tolerance,
         max_iterations=max_iterations,
     )
-    return plugin(forecast, truth, landsea_mask=land_sea_mask)
+    return plugin(forecast_directory,truth_directory,land_sea_mask=land_sea_mask)

From 8187177f6d21cdd85c844cd5a25dd697cc17d506 Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Fri, 27 Sep 2024 09:22:01 +0100
Subject: [PATCH 04/12] reliabilty calibration

---
 improver/calibration/ensemble_calibration.py  | 21 ++++++++--
 .../calibration/reliability_calibration.py    | 42 +++++++++++++++++++
 improver/cli/construct_reliability_tables.py  | 33 +++++++++------
 improver/cli/estimate_emos_coefficients.py    | 12 ++++--
 4 files changed, 87 insertions(+), 21 deletions(-)

diff --git a/improver/calibration/ensemble_calibration.py b/improver/calibration/ensemble_calibration.py
index fcf74efa05..61bf6d269b 100644
--- a/improver/calibration/ensemble_calibration.py
+++ b/improver/calibration/ensemble_calibration.py
@@ -25,6 +25,7 @@
 from scipy.stats import norm
 
 from improver import BasePlugin, PostProcessingPlugin
+from improver.calibration import get_cube_from_directory
 from improver.calibration.utilities import (
     broadcast_data_to_time_coord,
     check_data_sufficiency,
@@ -51,7 +52,7 @@
     generate_mandatory_attributes,
 )
 from improver.utilities.cube_manipulation import collapsed, enforce_coordinate_ordering
-from improver.calibration import get_cube_from_directory
+
 
 class ContinuousRankedProbabilityScoreMinimisers(BasePlugin):
     """
@@ -1365,11 +1366,14 @@ def process(
         )
         return coefficients_cubelist
 
+
 class MetaEstimateCoefficientsForEnsembleCalibration(BasePlugin):
     """
     Meta plugin for handling directories of netcdfs as inputs, instead of cubes
     """
-    def __init__(self,
+
+    def __init__(
+        self,
         distribution,
         truth_attribute,
         cycle_point: str = None,
@@ -1393,8 +1397,17 @@ def __init__(self,
         self.max_iterations = max_iterations
 
     def process(self, forecast_directory, truth_directory, land_sea_mask=None):
-        self.forecast = get_cube_from_directory(forecast_directory, cycle_point=self.cycle_point, max_days_offset=self.max_days_offset)
-        self.truth = get_cube_from_directory(truth_directory, cycle_point=self.cycle_point, max_days_offset=self.max_days_offset)
+        self.forecast = get_cube_from_directory(
+            forecast_directory,
+            cycle_point=self.cycle_point,
+            max_days_offset=self.max_days_offset,
+        )
+        self.truth = get_cube_from_directory(
+            truth_directory,
+            cycle_point=self.cycle_point,
+            max_days_offset=self.max_days_offset,
+        )
+
         plugin = EstimateCoefficientsForEnsembleCalibration(
             self.distribution,
             point_by_point=self.point_by_point,
diff --git a/improver/calibration/reliability_calibration.py b/improver/calibration/reliability_calibration.py
index 7d291f7434..c44ef5eddd 100644
--- a/improver/calibration/reliability_calibration.py
+++ b/improver/calibration/reliability_calibration.py
@@ -18,6 +18,7 @@
 from numpy.ma.core import MaskedArray
 
 from improver import BasePlugin, PostProcessingPlugin
+from improver.calibration import get_cube_from_directory
 from improver.calibration.utilities import (
     check_forecast_consistency,
     create_unified_frt_coord,
@@ -527,6 +528,47 @@ def process(
         return MergeCubes()(reliability_tables, copy=False)
 
 
+class MetaConstructReliabilityCalibrationTables(BasePlugin):
+    def __init__(
+        self,
+        truth_attribute,
+        n_probability_bins: int = 5,
+        single_value_lower_limit: bool = False,
+        single_value_upper_limit: bool = False,
+        aggregate_coordinates: list = None,
+        cycle_point=None,
+        max_days_offset=None,
+    ):
+        self.truth_attribute = truth_attribute
+        self.n_probability_bins = n_probability_bins
+        self.single_value_lower_limit = single_value_lower_limit
+        self.single_value_upper_limit = single_value_upper_limit
+        self.cycle_point = cycle_point
+        self.max_days_offset = max_days_offset
+        self.aggregate_coordinates = aggregate_coordinates
+
+    def process(self, forecast_directory, truth_directory):
+        self.forecast = get_cube_from_directory(
+            forecast_directory,
+            cycle_point=self.cycle_point,
+            max_days_offset=self.max_days_offset,
+        )
+        self.truth = get_cube_from_directory(
+            truth_directory,
+            cycle_point=self.cycle_point,
+            max_days_offset=self.max_days_offset,
+        )
+
+        plugin = ConstructReliabilityCalibrationTables(
+            truth_attribute=self.truth_attribute,
+            n_probability_bins=self.n_probability_bins,
+            single_value_lower_limit=self.single_value_lower_limit,
+            single_value_upper_limit=self.single_value_upper_limit,
+            aggregate_coordinates=self.aggregate_coordinates,
+        )
+        return plugin(self.forecast, self.truth)
+
+
 class AggregateReliabilityCalibrationTables(BasePlugin):
 
     """This plugin enables the aggregation of multiple reliability calibration
diff --git a/improver/cli/construct_reliability_tables.py b/improver/cli/construct_reliability_tables.py
index 3c70e9a721..e49461c73f 100644
--- a/improver/cli/construct_reliability_tables.py
+++ b/improver/cli/construct_reliability_tables.py
@@ -11,12 +11,15 @@
 @cli.clizefy
 @cli.with_output
 def process(
-    *cubes: cli.inputcube,
+    forecast_directory: cli.inputpath,
+    truth_directory: cli.inputpath,
     truth_attribute,
     n_probability_bins: int = 5,
     single_value_lower_limit: bool = False,
     single_value_upper_limit: bool = False,
     aggregate_coordinates: cli.comma_separated_list = None,
+    cycle_point: str = None,
+    max_days_offset: int = None,
 ):
     """Populate reliability tables for use in reliability calibration.
 
@@ -26,12 +29,10 @@ def process(
     cubes and the thresholded truth.
 
     Args:
-        cubes (list of iris.cube.Cube):
-            A list of cubes containing the historical probability forecasts and
-            corresponding truths used for calibration. These cubes must include
-            the same diagnostic name in their names, and must both have
-            equivalent threshold coordinates. The cubes will be distinguished
-            using the user provided truth attribute.
+        forecast_directory (posix.Path):
+            The path to a directory containing the historical forecasts
+        truth_directory (posix.Path):
+            The path to a directory containing the truths to be used
         truth_attribute (str):
             An attribute and its value in the format of "attribute=value",
             which must be present on truth cubes.
@@ -51,21 +52,27 @@ def process(
             calibration table using summation. This is equivalent to constructing
             then using aggregate-reliability-tables but with reduced memory
             usage due to avoiding large intermediate data.
+        cycle_point (str):
+            Current cycle point. Used in combination with max_days_offset to identify
+            which historic forecasts and truths to use.
+        max_days_offset (int):
+            Maximum offset in days, used to identify the oldest acceptable inputs
+
 
     Returns:
         iris.cube.Cube:
             Reliability tables for the forecast diagnostic with a leading
             threshold coordinate.
     """
-    from improver.calibration import split_forecasts_and_truth
     from improver.calibration.reliability_calibration import (
-        ConstructReliabilityCalibrationTables,
+        MetaConstructReliabilityCalibrationTables,
     )
 
-    forecast, truth, _ = split_forecasts_and_truth(cubes, truth_attribute)
-
-    return ConstructReliabilityCalibrationTables(
+    return MetaConstructReliabilityCalibrationTables(
         n_probability_bins=n_probability_bins,
         single_value_lower_limit=single_value_lower_limit,
         single_value_upper_limit=single_value_upper_limit,
-    )(forecast, truth, aggregate_coordinates)
+        aggregate_coordinates=aggregate_coordinates,
+        cycle_point=cycle_point,
+        max_days_offset=max_days_offset,
+    )(forecast_directory, truth_directory)
diff --git a/improver/cli/estimate_emos_coefficients.py b/improver/cli/estimate_emos_coefficients.py
index f40f575750..ab85e193ec 100755
--- a/improver/cli/estimate_emos_coefficients.py
+++ b/improver/cli/estimate_emos_coefficients.py
@@ -85,7 +85,11 @@ def process(
             is raised. If the predictor is "realizations", then the number of
             iterations may require increasing, as there will be more
             coefficients to solve.
-
+        cycle_point (str):
+            Current cycle point. Used in combination with max_days_offset to identify
+            which historic forecasts and truths to use.
+        max_days_offset (int):
+            Maximum offset in days, used to identify the oldest acceptable inputs
     Returns:
         iris.cube.CubeList:
             CubeList containing the coefficients estimated using EMOS. Each
@@ -99,8 +103,8 @@ def process(
     plugin = MetaEstimateCoefficientsForEnsembleCalibration(
         distribution,
         truth_attribute,
-        cycle_point = cycle_point,
-        max_days_offset = max_days_offset,
+        cycle_point=cycle_point,
+        max_days_offset=max_days_offset,
         point_by_point=point_by_point,
         use_default_initial_guess=use_default_initial_guess,
         desired_units=units,
@@ -108,4 +112,4 @@ def process(
         tolerance=tolerance,
         max_iterations=max_iterations,
     )
-    return plugin(forecast_directory,truth_directory,land_sea_mask=land_sea_mask)
+    return plugin(forecast_directory, truth_directory, land_sea_mask=land_sea_mask)

From 66978c015118ac710b75def145e0e325726e7e5a Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Fri, 27 Sep 2024 10:57:33 +0100
Subject: [PATCH 05/12] construct reliability tables accepts directories

---
 improver/calibration/__init__.py              | 19 +++++++++++--------
 improver/calibration/ensemble_calibration.py  |  4 ++--
 .../calibration/reliability_calibration.py    |  8 ++++++--
 improver/cli/construct_reliability_tables.py  |  6 ++++--
 improver/cli/estimate_emos_coefficients.py    |  6 ++++--
 5 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index 2307af40ef..4af1654dca 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -7,6 +7,7 @@
 """
 
 from collections import OrderedDict
+from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Tuple
 
 from iris.cube import Cube, CubeList
@@ -17,6 +18,7 @@
 from improver.utilities.cube_manipulation import MergeCubes
 from improver.utilities.load import load_cubelist
 
+
 def split_forecasts_and_truth(
     cubes: List[Cube], truth_attribute: str
 ) -> Tuple[Cube, Cube, Optional[Cube]]:
@@ -267,13 +269,14 @@ def add_warning_comment(forecast: Cube) -> Cube:
         )
     return forecast
 
-from datetime import datetime, timedelta
 
-def get_cube_from_directory(directory, cycle_point=None, max_days_offset=None, date_format='%Y%m%dT%H%MZ'):
+def get_cube_from_directory(
+    directory, cycle_point=None, max_days_offset=None, date_format="%Y%m%dT%H%MZ"
+):
     """
     loads and merges all netCDF files in a directory
 
-    To switch on the max offset filter, both cycle_point and max_days_offset 
+    To switch on the max offset filter, both cycle_point and max_days_offset
     need to be provided
     Args:
         directory (pathlib.Path):
@@ -284,7 +287,7 @@ def get_cube_from_directory(directory, cycle_point=None, max_days_offset=None, d
             Maximum number of days before cycle_point to consider files,
             Defined as a postive int that is subtracted from the cycle_point
         date_format (str):
-            format of the cyclepoint and datetime in the filename, used by 
+            format of the cyclepoint and datetime in the filename, used by
             datetime.strptime
 
     Returns:
@@ -294,19 +297,19 @@ def get_cube_from_directory(directory, cycle_point=None, max_days_offset=None, d
     if len(files) == 0:
         # This is probably too serious - is there a quiet way to handle this?
         raise ValueError(f"No files found in {directory}")
-    
+
     if max_days_offset and cycle_point:
         # Ignore files if they are older than max_days_offset days from cycle_point
         cycle_point = datetime.strptime(cycle_point, date_format)
         earliest_time = cycle_point - timedelta(days=max_days_offset)
         for filename in files.copy():
-            file_datetime = filename.split('/')[-1].split('-')[0]
+            file_datetime = filename.split("/")[-1].split("-")[0]
             if datetime.strptime(file_datetime, date_format) < earliest_time:
                 files.remove(filename)
-    
+
     if len(files) < 2:
         raise ValueError(f"Not enough files found in {directory}")
 
     # Check for a lower limit on number of files? - 2
     cubes = load_cubelist(files)
-    return MergeCubes()(cubes)
\ No newline at end of file
+    return MergeCubes()(cubes)
diff --git a/improver/calibration/ensemble_calibration.py b/improver/calibration/ensemble_calibration.py
index 61bf6d269b..2cd3ee049b 100644
--- a/improver/calibration/ensemble_calibration.py
+++ b/improver/calibration/ensemble_calibration.py
@@ -1376,8 +1376,8 @@ def __init__(
         self,
         distribution,
         truth_attribute,
-        cycle_point: str = None,
-        max_days_offset: int = None,
+        cycle_point=None,
+        max_days_offset=None,
         point_by_point=False,
         use_default_initial_guess=False,
         units=None,
diff --git a/improver/calibration/reliability_calibration.py b/improver/calibration/reliability_calibration.py
index c44ef5eddd..af1a7440e9 100644
--- a/improver/calibration/reliability_calibration.py
+++ b/improver/calibration/reliability_calibration.py
@@ -529,6 +529,10 @@ def process(
 
 
 class MetaConstructReliabilityCalibrationTables(BasePlugin):
+    """
+    Meta plugin for handling directories of netcdfs as inputs, instead of cubes
+    """
+
     def __init__(
         self,
         truth_attribute,
@@ -536,8 +540,8 @@ def __init__(
         single_value_lower_limit: bool = False,
         single_value_upper_limit: bool = False,
         aggregate_coordinates: list = None,
-        cycle_point=None,
-        max_days_offset=None,
+        cycle_point: Optional[str] = None,
+        max_days_offset: Optional[int] = None,
     ):
         self.truth_attribute = truth_attribute
         self.n_probability_bins = n_probability_bins
diff --git a/improver/cli/construct_reliability_tables.py b/improver/cli/construct_reliability_tables.py
index e49461c73f..e1b2750ef1 100644
--- a/improver/cli/construct_reliability_tables.py
+++ b/improver/cli/construct_reliability_tables.py
@@ -5,6 +5,8 @@
 # See LICENSE in the root of the repository for full licensing details.
 """CLI to construct reliability tables for use in reliability calibration."""
 
+from typing import Optional
+
 from improver import cli
 
 
@@ -18,8 +20,8 @@ def process(
     single_value_lower_limit: bool = False,
     single_value_upper_limit: bool = False,
     aggregate_coordinates: cli.comma_separated_list = None,
-    cycle_point: str = None,
-    max_days_offset: int = None,
+    cycle_point: Optional[str] = None,
+    max_days_offset: Optional[int] = None,
 ):
     """Populate reliability tables for use in reliability calibration.
 
diff --git a/improver/cli/estimate_emos_coefficients.py b/improver/cli/estimate_emos_coefficients.py
index ab85e193ec..99d1653f59 100755
--- a/improver/cli/estimate_emos_coefficients.py
+++ b/improver/cli/estimate_emos_coefficients.py
@@ -7,6 +7,8 @@
 Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
 Regression (NGR)."""
 
+from typing import Optional
+
 from improver import cli
 
 
@@ -19,8 +21,8 @@ def process(
     *,
     distribution,
     truth_attribute,
-    cycle_point: str = None,
-    max_days_offset: int = None,
+    cycle_point: Optional[str] = None,
+    max_days_offset: Optional[int] = None,
     point_by_point=False,
     use_default_initial_guess=False,
     units=None,

From 99a9c8bbe80fe10d97e8b68e4f618156dd80f8c1 Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Fri, 27 Sep 2024 16:15:42 +0100
Subject: [PATCH 06/12] return no result if there is nothing to process

---
 improver/calibration/__init__.py              | 19 +++++++++----
 improver/calibration/ensemble_calibration.py  | 28 ++++++++++---------
 .../calibration/reliability_calibration.py    | 22 ++++++++-------
 improver/cli/construct_reliability_tables.py  |  6 ++--
 improver/cli/estimate_emos_coefficients.py    |  8 ++----
 5 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index 4af1654dca..23e318fa6d 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -271,7 +271,11 @@ def add_warning_comment(forecast: Cube) -> Cube:
 
 
 def get_cube_from_directory(
-    directory, cycle_point=None, max_days_offset=None, date_format="%Y%m%dT%H%MZ"
+    directory,
+    cycle_point=None,
+    max_days_offset=None,
+    date_format="%Y%m%dT%H%MZ",
+    verbose=False,
 ):
     """
     loads and merges all netCDF files in a directory
@@ -289,17 +293,21 @@ def get_cube_from_directory(
         date_format (str):
             format of the cyclepoint and datetime in the filename, used by
             datetime.strptime
+        verbose (bool):
+            switch on verbose output
 
     Returns:
         Cube
     """
     files = [*map(str, directory.glob("*.nc"))]
     if len(files) == 0:
-        # This is probably too serious - is there a quiet way to handle this?
-        raise ValueError(f"No files found in {directory}")
+        if verbose:
+            print(f"No files found in {directory}")
+        return None
 
     if max_days_offset and cycle_point:
         # Ignore files if they are older than max_days_offset days from cycle_point
+        # ToDo - test checking by metadata, not file names
         cycle_point = datetime.strptime(cycle_point, date_format)
         earliest_time = cycle_point - timedelta(days=max_days_offset)
         for filename in files.copy():
@@ -308,8 +316,9 @@ def get_cube_from_directory(
                 files.remove(filename)
 
     if len(files) < 2:
-        raise ValueError(f"Not enough files found in {directory}")
+        if verbose:
+            print(f"Not enough files found in {directory}")
+        return None
 
-    # Check for a lower limit on number of files? - 2
     cubes = load_cubelist(files)
     return MergeCubes()(cubes)
diff --git a/improver/calibration/ensemble_calibration.py b/improver/calibration/ensemble_calibration.py
index 2cd3ee049b..e86f5243b9 100644
--- a/improver/calibration/ensemble_calibration.py
+++ b/improver/calibration/ensemble_calibration.py
@@ -1397,27 +1397,29 @@ def __init__(
         self.max_iterations = max_iterations
 
     def process(self, forecast_directory, truth_directory, land_sea_mask=None):
-        self.forecast = get_cube_from_directory(
+        forecast = get_cube_from_directory(
             forecast_directory,
             cycle_point=self.cycle_point,
             max_days_offset=self.max_days_offset,
         )
-        self.truth = get_cube_from_directory(
+        truth = get_cube_from_directory(
             truth_directory,
             cycle_point=self.cycle_point,
             max_days_offset=self.max_days_offset,
         )
-
-        plugin = EstimateCoefficientsForEnsembleCalibration(
-            self.distribution,
-            point_by_point=self.point_by_point,
-            use_default_initial_guess=self.use_default_initial_guess,
-            desired_units=self.units,
-            predictor=self.predictor,
-            tolerance=self.tolerance,
-            max_iterations=self.max_iterations,
-        )
-        return plugin(self.forecast, self.truth, landsea_mask=land_sea_mask)
+        # need any additional metadata checks?
+        if forecast and truth:
+            plugin = EstimateCoefficientsForEnsembleCalibration(
+                self.distribution,
+                point_by_point=self.point_by_point,
+                use_default_initial_guess=self.use_default_initial_guess,
+                desired_units=self.units,
+                predictor=self.predictor,
+                tolerance=self.tolerance,
+                max_iterations=self.max_iterations,
+            )
+            return plugin(forecast, truth, landsea_mask=land_sea_mask)
+        return None
 
 
 class CalibratedForecastDistributionParameters(BasePlugin):
diff --git a/improver/calibration/reliability_calibration.py b/improver/calibration/reliability_calibration.py
index af1a7440e9..daa4b84e24 100644
--- a/improver/calibration/reliability_calibration.py
+++ b/improver/calibration/reliability_calibration.py
@@ -552,25 +552,27 @@ def __init__(
         self.aggregate_coordinates = aggregate_coordinates
 
     def process(self, forecast_directory, truth_directory):
-        self.forecast = get_cube_from_directory(
+        forecast = get_cube_from_directory(
             forecast_directory,
             cycle_point=self.cycle_point,
             max_days_offset=self.max_days_offset,
         )
-        self.truth = get_cube_from_directory(
+        truth = get_cube_from_directory(
             truth_directory,
             cycle_point=self.cycle_point,
             max_days_offset=self.max_days_offset,
         )
 
-        plugin = ConstructReliabilityCalibrationTables(
-            truth_attribute=self.truth_attribute,
-            n_probability_bins=self.n_probability_bins,
-            single_value_lower_limit=self.single_value_lower_limit,
-            single_value_upper_limit=self.single_value_upper_limit,
-            aggregate_coordinates=self.aggregate_coordinates,
-        )
-        return plugin(self.forecast, self.truth)
+        if forecast and truth:
+            plugin = ConstructReliabilityCalibrationTables(
+                truth_attribute=self.truth_attribute,
+                n_probability_bins=self.n_probability_bins,
+                single_value_lower_limit=self.single_value_lower_limit,
+                single_value_upper_limit=self.single_value_upper_limit,
+                aggregate_coordinates=self.aggregate_coordinates,
+            )
+            return plugin(forecast, truth)
+        return None
 
 
 class AggregateReliabilityCalibrationTables(BasePlugin):
diff --git a/improver/cli/construct_reliability_tables.py b/improver/cli/construct_reliability_tables.py
index e1b2750ef1..e49461c73f 100644
--- a/improver/cli/construct_reliability_tables.py
+++ b/improver/cli/construct_reliability_tables.py
@@ -5,8 +5,6 @@
 # See LICENSE in the root of the repository for full licensing details.
 """CLI to construct reliability tables for use in reliability calibration."""
 
-from typing import Optional
-
 from improver import cli
 
 
@@ -20,8 +18,8 @@ def process(
     single_value_lower_limit: bool = False,
     single_value_upper_limit: bool = False,
     aggregate_coordinates: cli.comma_separated_list = None,
-    cycle_point: Optional[str] = None,
-    max_days_offset: Optional[int] = None,
+    cycle_point: str = None,
+    max_days_offset: int = None,
 ):
     """Populate reliability tables for use in reliability calibration.
 
diff --git a/improver/cli/estimate_emos_coefficients.py b/improver/cli/estimate_emos_coefficients.py
index 99d1653f59..58f39f80c1 100755
--- a/improver/cli/estimate_emos_coefficients.py
+++ b/improver/cli/estimate_emos_coefficients.py
@@ -7,8 +7,6 @@
 Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
 Regression (NGR)."""
 
-from typing import Optional
-
 from improver import cli
 
 
@@ -21,8 +19,8 @@ def process(
     *,
     distribution,
     truth_attribute,
-    cycle_point: Optional[str] = None,
-    max_days_offset: Optional[int] = None,
+    cycle_point: str = None,
+    max_days_offset: int = None,
     point_by_point=False,
     use_default_initial_guess=False,
     units=None,
@@ -109,7 +107,7 @@ def process(
         max_days_offset=max_days_offset,
         point_by_point=point_by_point,
         use_default_initial_guess=use_default_initial_guess,
-        desired_units=units,
+        units=units,
         predictor=predictor,
         tolerance=tolerance,
         max_iterations=max_iterations,

From 0d3019c5235e90eeac53bf9b60b29f363f279004 Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Tue, 8 Oct 2024 11:17:27 +0100
Subject: [PATCH 07/12] filter cubes by metadata

---
 improver/calibration/__init__.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index 23e318fa6d..a18544eb4b 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -11,6 +11,7 @@
 from typing import Dict, List, Optional, Tuple
 
 from iris.cube import Cube, CubeList
+from iris import load
 
 from improver.metadata.probabilistic import (
     get_diagnostic_cube_name_from_probability_name,
@@ -305,20 +306,20 @@ def get_cube_from_directory(
             print(f"No files found in {directory}")
         return None
 
+    cubes = load_cubelist(files)
     if max_days_offset and cycle_point:
-        # Ignore files if they are older than max_days_offset days from cycle_point
-        # ToDo - test checking by metadata, not file names
         cycle_point = datetime.strptime(cycle_point, date_format)
         earliest_time = cycle_point - timedelta(days=max_days_offset)
-        for filename in files.copy():
-            file_datetime = filename.split("/")[-1].split("-")[0]
-            if datetime.strptime(file_datetime, date_format) < earliest_time:
-                files.remove(filename)
-
-    if len(files) < 2:
+        for cube in cubes.copy():
+            rt = cube.coord("forecast_reference_time").points[0]
+            period = cube.coord("forecast_period").points[0]
+            dt = datetime.fromtimestamp(rt + period)
+            if dt < earliest_time:
+                cubes.remove(cube)
+
+    if len(cubes) < 2:
         if verbose:
             print(f"Not enough files found in {directory}")
         return None
 
-    cubes = load_cubelist(files)
     return MergeCubes()(cubes)

From 9cb95f75b03e5cd8a2e14db03bf341c446b5305f Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Tue, 8 Oct 2024 13:02:47 +0100
Subject: [PATCH 08/12] update acceptance tests

---
 .../test_estimate_emos_coefficients.py        | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/improver_tests/acceptance/test_estimate_emos_coefficients.py b/improver_tests/acceptance/test_estimate_emos_coefficients.py
index fd0f7be794..84ffc65ad1 100644
--- a/improver_tests/acceptance/test_estimate_emos_coefficients.py
+++ b/improver_tests/acceptance/test_estimate_emos_coefficients.py
@@ -43,8 +43,8 @@ def test_normal(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients/normal"
     kgo_path = kgo_dir / "kgo.nc"
-    history_path = kgo_dir / "history/*.nc"
-    truth_path = kgo_dir / "truth/*.nc"
+    history_path = kgo_dir / "history/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,
@@ -72,8 +72,8 @@ def test_truncated_normal(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients/truncated_normal"
     kgo_path = kgo_dir / "kgo.nc"
-    history_path = kgo_dir / "history/*.nc"
-    truth_path = kgo_dir / "truth/*.nc"
+    history_path = kgo_dir / "history/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,
@@ -101,8 +101,8 @@ def test_normal_default_initial_guess(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients/normal"
     kgo_path = kgo_dir / "default_initial_guess_kgo.nc"
-    history_path = kgo_dir / "history/*.nc"
-    truth_path = kgo_dir / "truth/*.nc"
+    history_path = kgo_dir / "history/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,
@@ -128,8 +128,8 @@ def test_units(tmp_path):
     """Test prescribed units that may not match inputs"""
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients/normal"
     kgo_path = kgo_dir / "kgo.nc"
-    history_path = kgo_dir / "history/*.nc"
-    truth_path = kgo_dir / "truth/*.nc"
+    history_path = kgo_dir / "history/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,
@@ -155,8 +155,8 @@ def test_using_realizations_as_predictor(tmp_path):
     """Test using non-default predictor realizations"""
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients"
     kgo_path = kgo_dir / "normal/realizations/kgo.nc"
-    history_path = kgo_dir / "normal/history/*.nc"
-    truth_path = kgo_dir / "normal/truth/*.nc"
+    history_path = kgo_dir / "normal/history/"
+    truth_path = kgo_dir / "normal/truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,
@@ -186,8 +186,8 @@ def test_land_points_only(tmp_path):
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients"
     kgo_path = kgo_dir / "normal/land_only_kgo.nc"
     lsmask_path = kgo_dir / "landmask.nc"
-    history_path = kgo_dir / "normal/history/*.nc"
-    truth_path = kgo_dir / "normal/truth/*.nc"
+    history_path = kgo_dir / "normal/history/"
+    truth_path = kgo_dir / "normal/truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,
@@ -217,8 +217,8 @@ def test_normal_point_by_point_sites(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients/normal/sites"
     kgo_path = kgo_dir / "point_by_point" / "kgo.nc"
-    history_path = kgo_dir / "history/*.nc"
-    truth_path = kgo_dir / "truth/*.nc"
+    history_path = kgo_dir / "history/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     est_emos_tol = str(0.01)
     compare_emos_tolerance = 0.1
@@ -250,8 +250,8 @@ def test_normal_realizations_point_by_point_sites(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients/normal/sites"
     kgo_path = kgo_dir / "point_by_point" / "realizations_kgo.nc"
-    history_path = kgo_dir / "history/*.nc"
-    truth_path = kgo_dir / "truth/*.nc"
+    history_path = kgo_dir / "history/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     est_emos_tol = str(0.01)
     compare_emos_tolerance = 0.1
@@ -285,8 +285,8 @@ def test_normal_point_by_point_default_initial_guess_sites(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "estimate-emos-coefficients/normal/sites"
     kgo_path = kgo_dir / "point_by_point_default_initial_guess" / "kgo.nc"
-    history_path = kgo_dir / "history/*.nc"
-    truth_path = kgo_dir / "truth/*.nc"
+    history_path = kgo_dir / "history/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     est_emos_tol = str(0.01)
     compare_emos_tolerance = 0.1

From 31a840dbbb95ad93e641097eb4e20991a4b94d0a Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Tue, 8 Oct 2024 14:50:33 +0100
Subject: [PATCH 09/12] reliability table acceptance tests - moved data

---
 improver/calibration/__init__.py                     |  4 +++-
 improver/calibration/ensemble_calibration.py         |  2 +-
 improver/calibration/reliability_calibration.py      |  5 ++---
 improver/cli/construct_reliability_tables.py         |  4 +++-
 improver_tests/acceptance/SHA256SUMS                 |  8 ++++----
 .../acceptance/test_construct_reliability_tables.py  | 12 ++++++------
 6 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index a18544eb4b..b95310d267 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -11,7 +11,6 @@
 from typing import Dict, List, Optional, Tuple
 
 from iris.cube import Cube, CubeList
-from iris import load
 
 from improver.metadata.probabilistic import (
     get_diagnostic_cube_name_from_probability_name,
@@ -301,6 +300,8 @@ def get_cube_from_directory(
         Cube
     """
     files = [*map(str, directory.glob("*.nc"))]
+    print(f"found {len(files)} files in {directory}")
+    print(files)
     if len(files) == 0:
         if verbose:
             print(f"No files found in {directory}")
@@ -322,4 +323,5 @@ def get_cube_from_directory(
             print(f"Not enough files found in {directory}")
         return None
 
+    print(f"returning {len(cubes)} cubes")
     return MergeCubes()(cubes)
diff --git a/improver/calibration/ensemble_calibration.py b/improver/calibration/ensemble_calibration.py
index e86f5243b9..c05db8d9f8 100644
--- a/improver/calibration/ensemble_calibration.py
+++ b/improver/calibration/ensemble_calibration.py
@@ -1407,7 +1407,7 @@ def process(self, forecast_directory, truth_directory, land_sea_mask=None):
             cycle_point=self.cycle_point,
             max_days_offset=self.max_days_offset,
         )
-        # need any additional metadata checks?
+
         if forecast and truth:
             plugin = EstimateCoefficientsForEnsembleCalibration(
                 self.distribution,
diff --git a/improver/calibration/reliability_calibration.py b/improver/calibration/reliability_calibration.py
index daa4b84e24..80eb29cb6e 100644
--- a/improver/calibration/reliability_calibration.py
+++ b/improver/calibration/reliability_calibration.py
@@ -565,13 +565,12 @@ def process(self, forecast_directory, truth_directory):
 
         if forecast and truth:
             plugin = ConstructReliabilityCalibrationTables(
-                truth_attribute=self.truth_attribute,
+                # truth_attribute=self.truth_attribute,
                 n_probability_bins=self.n_probability_bins,
                 single_value_lower_limit=self.single_value_lower_limit,
                 single_value_upper_limit=self.single_value_upper_limit,
-                aggregate_coordinates=self.aggregate_coordinates,
             )
-            return plugin(forecast, truth)
+            return plugin(forecast, truth, aggregate_coords=self.aggregate_coordinates)
         return None
 
 
diff --git a/improver/cli/construct_reliability_tables.py b/improver/cli/construct_reliability_tables.py
index e49461c73f..0affa5446c 100644
--- a/improver/cli/construct_reliability_tables.py
+++ b/improver/cli/construct_reliability_tables.py
@@ -13,7 +13,8 @@
 def process(
     forecast_directory: cli.inputpath,
     truth_directory: cli.inputpath,
-    truth_attribute,
+    *,
+    truth_attribute: str = "",
     n_probability_bins: int = 5,
     single_value_lower_limit: bool = False,
     single_value_upper_limit: bool = False,
@@ -69,6 +70,7 @@ def process(
     )
 
     return MetaConstructReliabilityCalibrationTables(
+        truth_attribute=truth_attribute,
         n_probability_bins=n_probability_bins,
         single_value_lower_limit=single_value_lower_limit,
         single_value_upper_limit=single_value_upper_limit,
diff --git a/improver_tests/acceptance/SHA256SUMS b/improver_tests/acceptance/SHA256SUMS
index 28f0132b04..27ab8e77f1 100644
--- a/improver_tests/acceptance/SHA256SUMS
+++ b/improver_tests/acceptance/SHA256SUMS
@@ -315,13 +315,13 @@ b60f6046c86319f8b7ca3b5d7902dbaf3a52f571f30ba56a1a4bc814c42dd341  ./combine/mini
 cd5fe4e4ef61d890c30cc9cbd236bf0dfdbedd5f12f8a92803aa57be84c0d9ab  ./combine/multiplication_cellmethods/kgo.nc
 f1ae76b9374c5d1076b89a7348fe9bbc393a12ae4ccdc660a170ba5ff0f823ab  ./combine/multiplication_cellmethods/precipitation_accumulation-PT01H.nc
 b8934494b4a24daa2408c4d95a2367e328e25e8323e34c67ef6026d51021be32  ./combine/multiplication_cellmethods/precipitation_is_snow.nc
-0bd96af6cb5c6caa045e397589dd0ce3b498af837d989fe73326f5e9459c6054  ./construct-reliability-tables/basic/forecast_0.nc
-fbc14286b4ce41e2e60df0870ae4911c1b00a38ec96912f43c6187fcaf7d02f6  ./construct-reliability-tables/basic/forecast_1.nc
+0bd96af6cb5c6caa045e397589dd0ce3b498af837d989fe73326f5e9459c6054  ./construct-reliability-tables/basic/forecast/forecast_0.nc
+fbc14286b4ce41e2e60df0870ae4911c1b00a38ec96912f43c6187fcaf7d02f6  ./construct-reliability-tables/basic/forecast/forecast_1.nc
 0d0edf9751a2019db952907700b02499ec9f1c360db4591a8012ca247a841c73  ./construct-reliability-tables/basic/kgo_aggregated.nc
 902e5cb9d3dc5d2b78bb99aff8370f9815adf5064b2caeb7abed73a56a897a43  ./construct-reliability-tables/basic/kgo_single_value_bins.nc
 72d4fd0655d1b7a2bc11d85741ec944f195c59813ae629e6858116c4e09eccb0  ./construct-reliability-tables/basic/kgo_without_single_value_bins.nc
-8ed50464c34b8673d98d1256d1c11b9eeea911dc79f7f75d425a590bf8697301  ./construct-reliability-tables/basic/truth_0.nc
-3999adb3749052d9efdfab863427a20a1fabbca06ff430c6c9cf5f89d1ea4d60  ./construct-reliability-tables/basic/truth_1.nc
+8ed50464c34b8673d98d1256d1c11b9eeea911dc79f7f75d425a590bf8697301  ./construct-reliability-tables/basic/truth/truth_0.nc
+3999adb3749052d9efdfab863427a20a1fabbca06ff430c6c9cf5f89d1ea4d60  ./construct-reliability-tables/basic/truth/truth_1.nc
 9795b9758a88e2c4d4171c8b08304f7f0711e03acda66a7394333f8b919ccf50  ./convection-ratio/basic/kgo.nc
 74f850942572aa99de807396d48bd80dd96088c638a9d5fa379b95f7c5ad8614  ./convection-ratio/basic/lwe_convective_precipitation_rate.nc
 b946c7687cb9ed02a12a934429a31306004ad45214cf4b451468b077018c0911  ./convection-ratio/basic/lwe_stratiform_precipitation_rate.nc
diff --git a/improver_tests/acceptance/test_construct_reliability_tables.py b/improver_tests/acceptance/test_construct_reliability_tables.py
index 4b577a8489..5f9da5150c 100644
--- a/improver_tests/acceptance/test_construct_reliability_tables.py
+++ b/improver_tests/acceptance/test_construct_reliability_tables.py
@@ -20,8 +20,8 @@ def test_no_single_value_bins(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "construct-reliability-tables/basic"
     kgo_path = kgo_dir / "kgo_without_single_value_bins.nc"
-    history_path = kgo_dir / "forecast*.nc"
-    truth_path = kgo_dir / "truth*.nc"
+    history_path = kgo_dir / "forecast/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,
@@ -42,8 +42,8 @@ def test_aggregate(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "construct-reliability-tables/basic"
     kgo_path = kgo_dir / "kgo_aggregated.nc"
-    history_path = kgo_dir / "forecast*.nc"
-    truth_path = kgo_dir / "truth*.nc"
+    history_path = kgo_dir / "forecast/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,
@@ -66,8 +66,8 @@ def test_single_value_bins(tmp_path):
     """
     kgo_dir = acc.kgo_root() / "construct-reliability-tables/basic"
     kgo_path = kgo_dir / "kgo_single_value_bins.nc"
-    history_path = kgo_dir / "forecast*.nc"
-    truth_path = kgo_dir / "truth*.nc"
+    history_path = kgo_dir / "forecast/"
+    truth_path = kgo_dir / "truth/"
     output_path = tmp_path / "output.nc"
     args = [
         history_path,

From a59946062a915cae44f81e34b87f39521945d2d6 Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Tue, 8 Oct 2024 15:36:07 +0100
Subject: [PATCH 10/12] tests for get_cube_from_directory

---
 improver/calibration/__init__.py        |  4 +-
 improver_tests/calibration/test_init.py | 72 +++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index b95310d267..f95299752b 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -300,8 +300,7 @@ def get_cube_from_directory(
         Cube
     """
     files = [*map(str, directory.glob("*.nc"))]
-    print(f"found {len(files)} files in {directory}")
-    print(files)
+
     if len(files) == 0:
         if verbose:
             print(f"No files found in {directory}")
@@ -323,5 +322,4 @@ def get_cube_from_directory(
             print(f"Not enough files found in {directory}")
         return None
 
-    print(f"returning {len(cubes)} cubes")
     return MergeCubes()(cubes)
diff --git a/improver_tests/calibration/test_init.py b/improver_tests/calibration/test_init.py
index 0b306b229e..ed96a66279 100644
--- a/improver_tests/calibration/test_init.py
+++ b/improver_tests/calibration/test_init.py
@@ -4,16 +4,21 @@
 # See LICENSE in the root of the repository for full licensing details.
 """Unit tests for calibration.__init__"""
 
+import os
 import unittest
 from datetime import datetime, timedelta
+from pathlib import Path
+from tempfile import mkdtemp
 
 import iris
 import numpy as np
 import pytest
 from iris.cube import CubeList
+from iris.tests import IrisTest
 
 from improver.calibration import (
     add_warning_comment,
+    get_cube_from_directory,
     split_forecasts_and_bias_files,
     split_forecasts_and_coeffs,
     split_forecasts_and_truth,
@@ -24,6 +29,7 @@
     set_up_probability_cube,
     set_up_variable_cube,
 )
+from improver.utilities.save import save_netcdf
 from improver_tests import ImproverTest
 
 
@@ -629,5 +635,71 @@ def test_add_warning_to_comment(comment):
     assert result.attributes["comment"] == expected
 
 
+class test_get_cube_from_directory(IrisTest):
+    """Test that the get_cube_from_directory function returns a cube."""
+
+    get_cube_from_directory
+
+    def setUp(self):
+        """Set up variables for use in testing."""
+        self.directory = Path(mkdtemp())
+        self.empty_directory = Path(mkdtemp())
+        self.filepath = os.path.join(self.directory, "temp.nc")
+        self.filepath2 = os.path.join(self.directory, "temp2.nc")
+        time = datetime(2017, 11, 10, 4, 0)
+        time2 = datetime(2017, 11, 10, 5, 0)
+        frt = datetime(2017, 11, 10, 0, 0)
+        self.cube = set_up_variable_cube(
+            np.ones((3, 3, 3), dtype=np.float32), time=time, frt=frt
+        )
+        self.cube2 = set_up_variable_cube(
+            np.zeros((3, 3, 3), dtype=np.float32), time=time2, frt=frt
+        )
+        save_netcdf(self.cube, self.filepath)
+        save_netcdf(self.cube2, self.filepath2)
+        self.realization_points = self.cube.coord("realization").points
+        self.time_points = self.cube.coord("time").points
+        self.time_points2 = self.cube2.coord("time").points
+        self.latitude_points = self.cube.coord("latitude").points
+        self.longitude_points = self.cube.coord("longitude").points
+
+    def tearDown(self):
+        """Remove temporary directories created for testing."""
+        os.remove(self.filepath)
+        os.remove(self.filepath2)
+        os.rmdir(self.directory)
+        os.rmdir(self.empty_directory)
+
+    def test_no_files(self):
+        """Test that nothing is returned for no inputs"""
+        result = get_cube_from_directory(self.empty_directory)
+        self.assertIsNone(result)
+
+    def test_old_files(self):
+        """Test that nothing is returned for old files"""
+        result = get_cube_from_directory(
+            self.directory, cycle_point="20180101T0000Z", max_days_offset=30
+        )
+        self.assertIsNone(result)
+
+    def test_relevant_files(self):
+        result = get_cube_from_directory(
+            self.directory, cycle_point="20171112T0000Z", max_days_offset=30
+        )
+
+        self.assertArrayAlmostEqual(
+            result.coord("realization").points, self.realization_points
+        )
+        self.assertArrayAlmostEqual(
+            result.coord("time").points, [self.time_points[0], self.time_points2[0]]
+        )
+        self.assertArrayAlmostEqual(
+            result.coord("latitude").points, self.latitude_points
+        )
+        self.assertArrayAlmostEqual(
+            result.coord("longitude").points, self.longitude_points
+        )
+
+
 if __name__ == "__main__":
     unittest.main()

From 098fa5892d5f57949e4c1ac50f23778cc3ca713b Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Tue, 8 Oct 2024 15:39:22 +0100
Subject: [PATCH 11/12] doc building

---
 improver/calibration/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index f95299752b..e72771e0a4 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -282,6 +282,7 @@ def get_cube_from_directory(
 
     To switch on the max offset filter, both cycle_point and max_days_offset
     need to be provided
+    
     Args:
         directory (pathlib.Path):
             The path to the directory.

From 95865ce360aba7f7ea6efa91215bf81e649a3454 Mon Sep 17 00:00:00 2001
From: Sam Griffiths <sam.griffiths@metoffice.gov.uk>
Date: Tue, 8 Oct 2024 15:46:32 +0100
Subject: [PATCH 12/12] sort whitespace

---
 improver/calibration/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index e72771e0a4..392e41a5bd 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -282,7 +282,7 @@ def get_cube_from_directory(
 
     To switch on the max offset filter, both cycle_point and max_days_offset
     need to be provided
-    
+
     Args:
         directory (pathlib.Path):
             The path to the directory.