metoppv · mspelman07 · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024
diff --git a/doc/source/extended_documentation/categorical/build_a_decision_tree.rst b/doc/source/extended_documentation/categorical/build_a_decision_tree.rst
@@ -74,9 +74,12 @@ accessed with this key contains the essentials that make the node function.
     must be exceeded or not exceeded (see threshold_condition) for the node to
     progress to the succeed target. Two values required if condition_combination
     is being used.
-  - **threshold_condition** (str): Defines the inequality test to be applied to
+  - **threshold_condition** (str or list(str)): Defines the inequality test to be applied to
     the probability threshold(s). Inequalities that can be used are "<=", "<",
-    ">", ">=".
+    ">", ">=". If multiple tests are being applied in a single node, this can be
+    a list of the same length as the probability_thresholds list with each element
+    defining the inequality for the corresponding threshold. Alternatively a single
+    inequality can be applied to all thresholds.
   - **condition_combination** (str): If multiple tests are being applied in a
     single node, this value determines the logic with which they are combined.
     The values can be "AND", "OR".
@@ -128,6 +131,14 @@ following additional keys:
   - **deterministic** (boolean): Determines whether the node is expecting a deterministic
     input.
 
+Additionally a node can be set up to handle masked points in the diagnostic_fields. By default if
+an input cube has masked points the decision tree will return a masked point.
+However an additional key can be added to the node to specify what path to take if the input cube
+has masked points. This key is:
+
+  - **if_masked** (str,optional): The next node if the input cube has masked points. This can
+    be the same as the "if_true" or "if_false" keys or can be a different node.
+
 The first leaf node above is encoded as follows::
 
   {

diff --git a/improver/categorical/decision_tree.py b/improver/categorical/decision_tree.py
@@ -378,9 +378,17 @@ def create_condition_chain(self, test_conditions: Dict) -> List:
             coord = "thresholds"
         else:
             coord = "probability_thresholds"
+        if isinstance(test_conditions["threshold_condition"], str):
+            comparator = [test_conditions["threshold_condition"]] * len(
+                test_conditions[coord]
+            )
+        else:
+            comparator = test_conditions["threshold_condition"]
 
-        for index, (diagnostic, p_threshold) in enumerate(
-            zip(test_conditions["diagnostic_fields"], test_conditions[coord])
+        for index, (diagnostic, comp, p_threshold) in enumerate(
+            zip(
+                test_conditions["diagnostic_fields"], comparator, test_conditions[coord]
+            )
         ):
 
             d_threshold = test_conditions.get("diagnostic_thresholds")
@@ -416,13 +424,7 @@ def create_condition_chain(self, test_conditions: Dict) -> List:
                     )
                 else:
                     extract_constraint = iris.Constraint(diagnostic)
-            conditions.append(
-                [
-                    extract_constraint,
-                    test_conditions["threshold_condition"],
-                    p_threshold,
-                ]
-            )
+            conditions.append([extract_constraint, comp, p_threshold])
         condition_chain = [conditions, test_conditions["condition_combination"]]
         return condition_chain
 
@@ -639,28 +641,36 @@ def compare_array_to_threshold(
         Args:
             arr
             comparator:
-                One of  '<', '>', '<=', '>='.
+                One of  '<', '>', '<=', '>=','is_masked'.
             threshold
 
         Returns:
             Array of booleans.
 
         Raises:
-            ValueError: If comparator is not one of '<', '>', '<=', '>='.
+            ValueError: If comparator is not one of '<', '>', '<=', '>=','is_masked'.
         """
         if comparator == "<":
-            return arr < threshold
+            result = arr < threshold
         elif comparator == ">":
-            return arr > threshold
+            result = arr > threshold
         elif comparator == "<=":
-            return arr <= threshold
+            result = arr <= threshold
         elif comparator == ">=":
-            return arr >= threshold
+            result = arr >= threshold
+        elif comparator == "is_masked":
+            if np.ma.is_masked(arr):
+                result = arr.mask
+            else:
+                result = arr != arr
         else:
             raise ValueError(
                 f"Invalid comparator: {comparator}. "
-                "Comparator must be one of '<', '>', '<=', '>='."
+                "Comparator must be one of '<', '>', '<=', '>=','is_masked'."
             )
+        if np.ma.is_masked(result):
+            result[result.mask] = False
+        return result
 
     def evaluate_extract_expression(
         self, cubes: CubeList, expression: Union[Constraint, List]
@@ -818,7 +828,11 @@ def process(self, *cubes: Union[Cube, CubeList]) -> Cube:
         graph = {
             key: [self.queries[key]["leaf"]]
             if "leaf" in self.queries[key].keys()
-            else [self.queries[key]["if_true"], self.queries[key]["if_false"]]
+            else [
+                self.queries[key]["if_true"],
+                self.queries[key]["if_false"],
+                self.queries[key].get("if_masked"),
+            ]
             for key in self.queries
         }
         # Search through tree for all leaves (category end points)
@@ -847,6 +861,25 @@ def process(self, *cubes: Union[Cube, CubeList]) -> Cube:
                             current["threshold_condition"],
                             current["condition_combination"],
                         ) = self.invert_condition(current)
+                    if current.get("if_masked") == next_node and current.get(
+                        "if_masked"
+                    ) not in [current.get("if_false"), current.get("if_true")]:
+                        # if if_masked is not the same as if_false or if_true, then
+                        # it is a separate branch of the tree and we need to replace
+                        # the condition.
+                        current["diagnostic_fields"] = current["diagnostic_fields"]
+                        current["threshold_condition"] = "is_masked"
+                        current["condition_combination"] = ""
+                    elif current.get("if_masked") == next_node:
+                        # if masked is the same as if_false or if_true, then we need
+                        # to add the masked condition to the existing condition.
+                        current["diagnostic_fields"] = current["diagnostic_fields"] * 2
+                        current["threshold_condition"] = [
+                            current["threshold_condition"],
+                            "is_masked",
+                        ]
+                        current["condition_combination"] = "OR"
+                        current["thresholds"] = current["thresholds"] * 2
                     if "leaf" not in current.keys():
                         conditions.append(self.create_condition_chain(current))
                 test_chain = [conditions, "AND"]

diff --git a/improver/categorical/utilities.py b/improver/categorical/utilities.py
@@ -27,6 +27,7 @@
 LEAF_OPTIONAL_KEY_WORDS = ["if_night", "is_unreachable", "group"]
 
 OPTIONAL_KEY_WORDS = [
+    "if_masked",
     "if_diagnostic_missing",
     "deterministic",
     "diagnostic_thresholds",
@@ -493,9 +494,13 @@ def check_tree(
                     f"Node {node} uses invalid threshold condition {threshold}"
                 )
 
-            # Check the succeed and fail destinations are valid; that is a valid
+            # Check the succeed fail and masked destinations are valid; that is a valid
             # category for leaf nodes, and other tree nodes otherwise
-            for result in "if_true", "if_false":
+            if items.get("if_masked"):
+                nodes = ["if_true", "if_false", "if_masked"]
+            else:
+                nodes = ["if_true", "if_false"]
+            for result in nodes:
                 value = items[result]
                 if isinstance(value, str):
                     if value not in decision_tree.keys():

diff --git a/improver_tests/categorical/decision_tree/__init__.py b/improver_tests/categorical/decision_tree/__init__.py
@@ -569,14 +569,34 @@ def deterministic_diagnostic_tree() -> Dict[str, Dict[str, Any]]:
     queries = {
         "meta": {"name": "precipitation_type"},
         "precip_rate": {
-            "if_true": "hail_rate",
+            "if_true": "cloud_top_temp",
             "if_false": "dry",
             "thresholds": [0],
             "threshold_condition": ">",
             "condition_combination": "",
             "diagnostic_fields": ["precipitation_rate"],
             "deterministic": True,
         },
+        "cloud_top_temp": {
+            "if_true": "cloud_base_temp",
+            "if_false": "snow",
+            "if_masked": "dry",
+            "thresholds": [258.15],
+            "threshold_condition": "<=",
+            "condition_combination": "",
+            "diagnostic_fields": ["cloud_top_temperature"],
+            "deterministic": True,
+        },
+        "cloud_base_temp": {
+            "if_true": "hail_rate",
+            "if_false": "snow",
+            "if_masked": "hail_rate",
+            "thresholds": [268.15],
+            "threshold_condition": ">=",
+            "condition_combination": "",
+            "diagnostic_fields": ["cloud_base_temperature"],
+            "deterministic": True,
+        },
         "hail_rate": {
             "if_true": "hail",
             "if_false": "rain",
@@ -589,6 +609,7 @@ def deterministic_diagnostic_tree() -> Dict[str, Dict[str, Any]]:
         "dry": {"leaf": 0},
         "rain": {"leaf": 1},
         "hail": {"leaf": 2},
+        "snow": {"leaf": 3},
     }
 
     return queries
diff --git a/improver_tests/categorical/decision_tree/test_ApplyDecisionTree.py b/improver_tests/categorical/decision_tree/test_ApplyDecisionTree.py
@@ -81,20 +81,90 @@ def hail_cube() -> Cube:
     return cube
 
 
+@pytest.fixture()
+def cloud_top_temp() -> Cube:
+    """
+    Set up a cloud top temperature, deterministic cube.
+    """
+    data = np.full((7, 3), dtype=np.float32, fill_value=250)
+    cube = set_up_variable_cube(
+        data,
+        name="cloud_top_temperature",
+        units="K",
+        time=dt(2017, 10, 10, 12, 0),
+        frt=dt(2017, 10, 10, 11, 0),
+    )
+    return cube
+
+
+@pytest.fixture()
+def cloud_base_temp() -> Cube:
+    """
+    Set up a cloud base temperature, deterministic cube.
+    """
+    data = np.full((7, 3), dtype=np.float32, fill_value=270)
+    cube = set_up_variable_cube(
+        data,
+        name="cloud_base_temperature",
+        units="K",
+        time=dt(2017, 10, 10, 12, 0),
+        frt=dt(2017, 10, 10, 11, 0),
+    )
+    return cube
+
+
+@pytest.mark.parametrize(
+    "masked_data", ("no_mask", "mask_precip", "mask_cloud_top", "mask_cloud_base")
+)
 @pytest.mark.parametrize(
     "precip_fill,hail_fill,expected", ((1, 1, 2), (1, 0, 1), (0, 0, 0), (0, 1, 0))
 )
 def test_non_probablistic_tree(
-    precip_cube, hail_cube, precip_fill, hail_fill, expected
+    precip_cube,
+    hail_cube,
+    cloud_top_temp,
+    cloud_base_temp,
+    precip_fill,
+    hail_fill,
+    expected,
+    masked_data,
 ):
     """Test that ApplyDecisionTree correctly manages a decision tree with deterministic
-    inputs"""
+    inputs. Also check that if data is masked the output will be masked unless the
+    node includes an if_masked setting."""
+
+    expected_array = np.full_like(precip_cube.data, expected)
+
+    if masked_data == "mask_precip":
+        # Test that if there is no if_masked setting in the decision tree, the output
+        # will be masked
+        precip_mask = np.full_like(precip_cube.data, False)
+        precip_mask[0, 0] = True
+        precip_cube.data = np.ma.masked_array(precip_cube.data, mask=precip_mask)
+
+        expected_mask = np.full_like(precip_cube.data, False)
+        expected_mask[0, 0] = True
+        expected = np.ma.masked_array(expected_array, mask=expected_mask)
+    elif masked_data == "mask_cloud_top":
+        # Testing if there is an if_masked setting in the decision tree going to a
+        # different node than if_true or if_false
+        cloud_mask = np.full_like(cloud_top_temp.data, False)
+        cloud_mask[0, 0] = True
+        cloud_top_temp.data = np.ma.masked_array(cloud_top_temp.data, mask=cloud_mask)
+        expected_array[0, 0] = 0
+    elif masked_data == "mask_cloud_base":
+        # Testing if there is an if_masked setting in the decision tree going to
+        # the same node as if_true
+        cloud_mask = np.full_like(cloud_base_temp.data, False)
+        cloud_mask[0, 0] = True
+        cloud_base_temp.data = np.ma.masked_array(cloud_base_temp.data, mask=cloud_mask)
+
     precip_cube.data.fill(precip_fill)
     hail_cube.data.fill(hail_fill)
     result = ApplyDecisionTree(decision_tree=deterministic_diagnostic_tree())(
-        iris.cube.CubeList([precip_cube, hail_cube])
+        iris.cube.CubeList([precip_cube, hail_cube, cloud_top_temp, cloud_base_temp])
     )
-    assert np.all(result.data == expected)
+    assert np.all(result.data == expected_array)
 
 
 def test_non_probablistic_tree_missing_data(hail_cube):
@@ -554,6 +624,7 @@ def test_basic(self):
         self.assertIsInstance(result, list)
         self.assertEqual(len(result), 2)
         self.assertIsInstance(result[0], list)
+
         for i in range(2):
             constraint_exp = expected[0][i][0]
             constraint_res = result[0][i][0]

diff --git a/improver_tests/categorical/test_utilities.py b/improver_tests/categorical/test_utilities.py
@@ -425,6 +425,8 @@ def test_interrogate_decision_tree_accumulation_3h():
 def test_interrogate_decision_tree_deterministic():
     """Test that the function returns the right strings."""
     expected = (
+        "\u26C5 cloud_base_temperature (deterministic)\n"
+        "\u26C5 cloud_top_temperature (deterministic)\n"
         "\u26C5 hail_rate (deterministic)\n"
         "\u26C5 precipitation_rate (deterministic)\n"
     )