mdtanker · mdtanker · Aug 10, 2024 · Aug 10, 2024 · Aug 10, 2024 · Aug 10, 2024
diff --git a/docs/user_guide/estimating_regional_field.ipynb b/docs/user_guide/estimating_regional_field.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,7 +45,7 @@ dependencies = [
   "harmonica>=0.6.0",
   "polartoolkit",
   "numba",
-  "scipy",
+  "scipy<1.14", # issue with UQpy import
   "numba_progress",
   "tqdm",
   "pygmt",
@@ -56,7 +56,7 @@ dependencies = [
 ###
   "optuna>=3.1.0", # need JournalStorage
   "optuna-integration",
-  "botorch>=0.4.0",
+  "botorch>=0.8.1", # need logie_candidates_func
   "joblib",
   "psutil",
 ###

diff --git a/src/invert4geom/cross_validation.py b/src/invert4geom/cross_validation.py
@@ -1,5 +1,6 @@
 from __future__ import annotations  # pylint: disable=too-many-lines
 
+import copy
 import itertools
 import logging
 import pathlib
@@ -639,11 +640,11 @@ def zref_density_optimal_parameter(
         )
         # pylint: enable=duplicate-code
         # calculate regional field
-        reg_kwargs = regional_grav_kwargs.copy()  # type: ignore[union-attr]
+        reg_kwargs = copy.deepcopy(regional_grav_kwargs)
 
         grav_df = regional.regional_separation(
             grav_df=grav_df,
-            **reg_kwargs,
+            **reg_kwargs,  # type: ignore[arg-type]
         )
 
         # update starting model in kwargs
@@ -1162,7 +1163,7 @@ def regional_separation_score(
     """
 
     # pull out kwargs
-    kwargs = kwargs.copy()
+    kwargs = copy.deepcopy(kwargs)
     method = kwargs.pop("method")
     grav_df = kwargs.pop("grav_df")
     true_regional = kwargs.pop("true_regional", None)

diff --git a/src/invert4geom/inversion.py b/src/invert4geom/inversion.py
@@ -34,9 +34,9 @@ def grav_column_der(
     prism_density: NDArray,
 ) -> NDArray:
     """
-        Function to calculate the vertical derivate of the gravitational acceleration at
-        an observation point caused by a right, rectangular prism. Approximated with
-        Hammer's annulus approximation :footcite:p:`mccubbineairborne2016`.
+    Function to calculate the vertical derivate of the gravitational acceleration at
+    an observation point caused by a right, rectangular prism. Approximated with
+    Hammer's annulus approximation :footcite:p:`mccubbineairborne2016`.
 
     Parameters
     ----------
@@ -842,6 +842,11 @@ def run_inversion(
 
     utils._check_gravity_inside_topography_region(grav_df, prism_layer)  # pylint: disable=protected-access
 
+    # check no nans in gravity df
+    if grav_df.res.isnull().values.any():
+        msg = "gravity dataframe contains NaN values in the 'res' column"
+        raise ValueError(msg)
+
     log.info("starting inversion")
 
     time_start = time.perf_counter()
@@ -1180,7 +1185,7 @@ def run_inversion_workflow(  # equivalent to monte_carlo_full_workflow
         time in seconds for the inversion to run
     """
 
-    kwargs = kwargs.copy()
+    kwargs = copy.deepcopy(kwargs)
     grav_df = grav_df.copy()
 
     # get kwargs

diff --git a/src/invert4geom/optimization.py b/src/invert4geom/optimization.py
@@ -1,5 +1,6 @@
 from __future__ import annotations  # pylint: disable=too-many-lines
 
+import copy
 import itertools
 import logging
 import math
@@ -19,6 +20,7 @@
 import optuna
 import pandas as pd
 import psutil
+import verde as vd
 import xarray as xr
 from numpy.typing import NDArray
 from optuna.storages import JournalFileStorage, JournalStorage
@@ -845,7 +847,7 @@ def __init__(
         self.fname = fname
         self.grav_df = grav_df
         self.constraints_df = constraints_df
-        self.regional_grav_kwargs = regional_grav_kwargs
+        self.regional_grav_kwargs = copy.deepcopy(regional_grav_kwargs)
         self.zref_limits = zref_limits
         self.density_contrast_limits = density_contrast_limits
         self.zref = zref
@@ -880,7 +882,7 @@ def __call__(self, trial: optuna.trial) -> float:
             msg = f"`grav_df` needs all the following columns: {cols}"
             raise ValueError(msg)
 
-        kwargs = self.kwargs.copy()
+        kwargs = copy.deepcopy(self.kwargs)
 
         if kwargs.get("apply_weighting_grid", None) is True:
             msg = (
@@ -977,7 +979,7 @@ def __call__(self, trial: optuna.trial) -> float:
         )
         # pylint: enable=duplicate-code
         # calculate regional field
-        reg_kwargs = self.regional_grav_kwargs.copy()
+        reg_kwargs = copy.deepcopy(self.regional_grav_kwargs)
 
         constraints_warning = (
             "Using constraint point minimization technique for regional field "
@@ -1268,9 +1270,8 @@ def optimize_inversion_zref_density_contrast(
         ), "test column contains True value, not needed except for during damping CV"
 
     optuna.logging.set_verbosity(optuna.logging.WARN)
-
     if regional_grav_kwargs is not None:
-        regional_grav_kwargs = regional_grav_kwargs.copy()
+        regional_grav_kwargs = copy.deepcopy(regional_grav_kwargs)
 
     # if sampler not provided, use BoTorch as default unless grid_search is True
     if sampler is None:
@@ -1641,7 +1642,6 @@ def optimize_inversion_zref_density_contrast(
                         "Density contrast (kg/m$^3$)",
                     ),
                 )
-
     return study, final_inversion_results
 
 
@@ -1700,7 +1700,7 @@ def optimize_inversion_zref_density_contrast_kfolds(
     df = constraints_df.copy()
     df = df[df.columns.drop(list(df.filter(regex="fold_")))]
 
-    kwargs = kwargs.copy()
+    kwargs = copy.deepcopy(kwargs)
 
     # split into test and training sets
     testing_training_df = cross_validation.split_test_train(
@@ -1763,9 +1763,17 @@ def __call__(self, trial: optuna.trial) -> float:
         float
             the score of the eq_sources fit
         """
-        kwargs = self.kwargs.copy()
+        kwargs = copy.deepcopy(self.kwargs)
         # get parameters provided not as limits
         depth = kwargs.pop("depth", "default")
+        # calculate 4.5 times the mean distance between points
+        if depth == "default":
+            depth = np.mean(
+                vd.median_distance(
+                    (kwargs.get("coordinates")[0], kwargs.get("coordinates")[1]),  # type: ignore[unused-ignore, index]
+                    k_nearest=1,
+                )
+            )
         block_size = kwargs.pop("block_size", None)
         damping = kwargs.pop("damping", None)
 
@@ -1861,7 +1869,7 @@ def optimize_eq_source_params(
     """
     optuna.logging.set_verbosity(optuna.logging.WARN)
 
-    kwargs = kwargs.copy()
+    kwargs = copy.deepcopy(kwargs)
     # if sampler not provided, used TPE as default
     if sampler is None:
         sampler = optuna.samplers.TPESampler(
@@ -1964,10 +1972,14 @@ def optimize_eq_source_params(
         except KeyError:
             msg = (
                 "No depth parameter value found in best params or kwargs, setting to "
-                "'default'"
+                "'default' (4.5 times mean distance between points)"
             )
             log.warning(msg)
             best_depth = "default"
+    if best_depth == "default":
+        best_depth = np.mean(
+            vd.median_distance((coordinates[0], coordinates[1]), k_nearest=1)
+        )
     if best_block_size is None:
         try:
             best_block_size = kwargs["block_size"]
@@ -2301,7 +2313,7 @@ def __call__(self, trial: optuna.trial) -> float:
             the scores
         """
 
-        new_kwargs = self.kwargs.copy()
+        new_kwargs = copy.deepcopy(self.kwargs)
 
         if self.grid_method == "pygmt":
             new_kwargs["tension_factor"] = trial.suggest_float(
@@ -2321,15 +2333,6 @@ def __call__(self, trial: optuna.trial) -> float:
             )
 
         elif self.grid_method == "eq_sources":
-            if self.depth_limits is not None:
-                new_kwargs["depth"] = trial.suggest_float(
-                    "depth",
-                    self.depth_limits[0],
-                    self.depth_limits[1],
-                )
-            else:
-                new_kwargs["depth"] = self.kwargs.get("depth", "default")
-
             if self.block_size_limits is not None:
                 new_kwargs["block_size"] = trial.suggest_float(
                     "block_size",
@@ -2363,6 +2366,24 @@ def __call__(self, trial: optuna.trial) -> float:
             raise ValueError(msg)
 
         if isinstance(self.training_df, pd.DataFrame):
+            if self.depth_limits is not None:
+                new_kwargs["depth"] = trial.suggest_float(
+                    "depth",
+                    self.depth_limits[0],
+                    self.depth_limits[1],
+                )
+            else:
+                eq_depth = self.kwargs.get("depth", "default")
+                if eq_depth == "default":
+                    # calculate 4.5 times the mean distance between points
+                    eq_depth = np.mean(
+                        vd.median_distance(
+                            (self.training_df.easting, self.training_df.northing),
+                            k_nearest=1,
+                        )
+                    )
+                new_kwargs["depth"] = eq_depth
+
             with utils._log_level(logging.WARN):  # pylint: disable=protected-access
                 (
                     residual_constraint_score,
@@ -2398,6 +2419,27 @@ def __call__(self, trial: optuna.trial) -> float:
                 # for each fold, run CV
                 results = []
                 for i, _ in enumerate(pbar):
+                    if self.depth_limits is not None:
+                        new_kwargs["depth"] = trial.suggest_float(
+                            "depth",
+                            self.depth_limits[0],
+                            self.depth_limits[1],
+                        )
+                    else:
+                        eq_depth = self.kwargs.get("depth", "default")
+                        if eq_depth == "default":
+                            # calculate 4.5 times the mean distance between points
+                            eq_depth = np.mean(
+                                vd.median_distance(
+                                    (
+                                        self.training_df[i].easting,
+                                        self.training_df[i].northing,
+                                    ),
+                                    k_nearest=1,
+                                )
+                            )
+                        new_kwargs["depth"] = eq_depth
+
                     fold_results = cross_validation.regional_separation_score(
                         constraints_df=self.training_df[i],
                         testing_df=self.testing_df[i],
@@ -2841,7 +2883,7 @@ def optimize_regional_eq_sources(
 
     optuna.logging.set_verbosity(optuna.logging.WARN)
 
-    kwargs = kwargs.copy()
+    kwargs = copy.deepcopy(kwargs)
 
     # if sampler not provided, use TPE as default
     if sampler is None:
@@ -2897,6 +2939,11 @@ def optimize_regional_eq_sources(
 
     # get optimal hyperparameter values
     depth = best_trial.params.get("depth", kwargs.pop("depth", "default"))
+    if depth == "default":
+        # calculate 4.5 times the mean distance between points
+        depth = np.mean(
+            vd.median_distance((grav_df.easting, grav_df.northing), k_nearest=1)
+        )
     damping = best_trial.params.get("damping", kwargs.pop("damping", None))
     block_size = best_trial.params.get("block_size", kwargs.pop("block_size", None))
     grav_obs_height = best_trial.params.get(
@@ -3067,7 +3114,7 @@ def optimize_regional_constraint_point_minimization(
 
     optuna.logging.set_verbosity(optuna.logging.WARN)
 
-    kwargs = kwargs.copy()
+    kwargs = copy.deepcopy(kwargs)
 
     # if sampler not provided, use TPE as default
     if sampler is None:
@@ -3198,6 +3245,13 @@ def optimize_regional_constraint_point_minimization(
     tension_factor = best_trial.params.get("tension_factor", None)
     spline_dampings = best_trial.params.get("spline_dampings", None)
     depth = best_trial.params.get("depth", kwargs.pop("depth", "default"))
+    if depth == "default":
+        # calculate 4.5 times the mean distance between points
+        depth = np.mean(
+            vd.median_distance(
+                (constraints_df.easting, constraints_df.northing), k_nearest=1
+            )
+        )
     damping = best_trial.params.get("damping", kwargs.pop("damping", None))
     block_size = best_trial.params.get("block_size", kwargs.pop("block_size", None))
     grav_obs_height = best_trial.params.get(

diff --git a/src/invert4geom/plotting.py b/src/invert4geom/plotting.py
@@ -1,5 +1,6 @@
 from __future__ import annotations  # pylint: disable=too-many-lines
 
+import copy
 import typing
 
 import matplotlib as mpl
@@ -613,6 +614,7 @@ def plot_inversion_grav_results(
     iterations: list[int],
     constraints_df: pd.DataFrame | None = None,
     fig_height: float = 12,
+    constraint_style: str = "x.3c",
 ) -> None:
     """
     plot the initial and final misfit grids from the inversion and their difference
@@ -629,6 +631,8 @@ def plot_inversion_grav_results(
         constraint points to include in the plots
     fig_height : float, optional
         height of the figure, by default 12
+    constraint_style : str, optional
+        pygmt style string for for constraint points, by default 'x.3c'
     """
 
     grid = grav_results.set_index(["northing", "easting"]).to_xarray()
@@ -663,6 +667,7 @@ def plot_inversion_grav_results(
         cbar_label="mGal",
         title=f"Initial misfit: RMSE:{round(initial_rmse, 2)} mGal",
         points=points,
+        points_style=constraint_style,
     )
     fig = maps.plot_grd(
         dif,
@@ -676,6 +681,7 @@ def plot_inversion_grav_results(
         cbar_label="mGal",
         title=f"difference: RMSE:{round(utils.rmse(dif), 2)} mGal",
         points=points,
+        points_style=constraint_style,
     )
     fig = maps.plot_grd(
         final,
@@ -690,6 +696,7 @@ def plot_inversion_grav_results(
         cbar_label="mGal",
         title=f"Final misfit: RMSE:{round(final_rmse, 2)} mGal",
         points=points,
+        points_style=constraint_style,
     )
     fig.show()
 
@@ -737,7 +744,7 @@ def plot_inversion_iteration_results(
 
     misfit_grids, topo_grids, corrections_grids = grids
 
-    params = parameters.copy()
+    params = copy.deepcopy(parameters)
 
     # set figure parameters
     sub_width = 5
@@ -1037,6 +1044,7 @@ def plot_inversion_results(
             iterations,
             constraints_df=constraints_df,
             fig_height=kwargs.get("fig_height", 12),
+            constraint_style=kwargs.get("constraint_style", "x.3c"),
         )