💥 Uniform matrix filling interfaces.

CNES · Oct 10, 2023 · 6d098b5 · 6d098b5
1 parent dd54ed6
commit 6d098b5
Show file tree

Hide file tree

Showing 5 changed files with 111 additions and 118 deletions.
diff --git a/src/pyinterp/core/fill.pyi b/src/pyinterp/core/fill.pyi
@@ -16,7 +16,6 @@ from . import (
 )
 
 class FirstGuess:
-    __doc__: ClassVar[str] = ...  # read-only
     __members__: ClassVar[dict] = ...  # read-only
     Zero: ClassVar[FirstGuess] = ...
     ZonalAverage: ClassVar[FirstGuess] = ...
@@ -56,7 +55,6 @@ class FirstGuess:
 
 
 class ValueType:
-    __doc__: ClassVar[str] = ...  # read-only
     __members__: ClassVar[dict] = ...  # read-only
     All: ClassVar[ValueType] = ...
     Defined: ClassVar[ValueType] = ...
@@ -96,11 +94,6 @@ class ValueType:
         ...
 
 
-def fill_time_series(x: numpy.ndarray[numpy.int64],
-                     fill_value: int) -> numpy.ndarray[numpy.int64]:
-    ...
-
-
 def gauss_seidel_float32(grid: numpy.ndarray[numpy.float32],
                          first_guess: FirstGuess = ...,
                          is_circle: bool = ...,
@@ -212,10 +205,24 @@ def loess_float64(grid: TemporalGrid4DFloat64,
 
 
 def matrix_float32(x: numpy.ndarray[numpy.float32],
-                   y: numpy.ndarray[numpy.float32]) -> None:
+                   fill_value: float = ...) -> None:
     ...
 
 
 def matrix_float64(x: numpy.ndarray[numpy.float64],
-                   y: numpy.ndarray[numpy.float64]) -> None:
+                   fill_value: float = ...) -> None:
+    ...
+
+
+def vector_float32(x: numpy.ndarray[numpy.float32],
+                   fill_value: float = ...) -> None:
+    ...
+
+
+def vector_float64(x: numpy.ndarray[numpy.float64],
+                   fill_value: float = ...) -> None:
+    ...
+
+
+def vector_int64(x: numpy.ndarray[numpy.int64], fill_value: int) -> None:
     ...
diff --git a/src/pyinterp/core/include/pyinterp/fill.hpp b/src/pyinterp/core/include/pyinterp/fill.hpp
@@ -198,22 +198,17 @@ auto gauss_seidel(pybind11::EigenDRef<pyinterp::Matrix<Type>> &grid,
   return std::max(calculate(0), calculate(1));
 }
 
-/// Fills in the gaps between defined points in a line with interpolated values.
+/// Fills in the gaps between defined values in a line with interpolated
+/// values.
 ///
 /// @tparam T The type of the coordinates.
-/// @param x The x-coordinates of the points defining the line.
-/// @param y The y-coordinates of the points defining the line.
+/// @param x The values of the points defining the line.
 /// @param is_undefined A boolean vector indicating which points are undefined.
-/// If is_undefined[i] is true, then the point (x[i], y[i]) is undefined.
 template <typename T>
-void fill_line(EigenRefBlock<T> x, EigenRefBlock<T> y,
-               EigenRefBlock<bool> is_undefined) {
+void fill_line(EigenRefBlock<T> x, EigenRefBlock<bool> is_undefined) {
   T x0;
   T x1;
   T dx;
-  T y0;
-  T y1;
-  T dy;
   Eigen::Index di;
   Eigen::Index last_valid = -1;
   Eigen::Index first_valid = -1;
@@ -227,15 +222,11 @@ void fill_line(EigenRefBlock<T> x, EigenRefBlock<T> y,
       if (last_valid != -1 && (ix - last_valid) > 1) {
         x0 = x[last_valid];
         x1 = x[ix];
-        y0 = y[last_valid];
-        y1 = y[ix];
         di = ix - last_valid;
         dx = (x1 - x0) / di;
-        dy = (y1 - y0) / di;
         for (Eigen::Index jx = last_valid + 1; jx < ix; ++jx) {
           di = jx - last_valid;
           x[jx] = dx * di + x0;
-          y[jx] = dy * di + y0;
         }
       } else if (first_valid == -1) {
         // If this is the first valid point, then we can't interpolate the
@@ -257,17 +248,13 @@ void fill_line(EigenRefBlock<T> x, EigenRefBlock<T> y,
   x0 = x[first_valid];
   x1 = x[last_valid];
   dx = (x1 - x0) / (last_valid - first_valid);
-  y0 = y[first_valid];
-  y1 = y[last_valid];
-  dy = (y1 - y0) / (last_valid - first_valid);
 
   // If there is a gap between the last valid point and the end of the line,
   // then interpolate the gap.
   if (last_valid < (size - 1)) {
     for (Eigen::Index jx = last_valid + 1; jx < size; ++jx) {
       di = jx - last_valid;
       x[jx] = dx * di + x1;
-      y[jx] = dy * di + y1;
     }
   }
   // If there is a gap between the first valid point and the beginning of the
@@ -276,7 +263,6 @@ void fill_line(EigenRefBlock<T> x, EigenRefBlock<T> y,
     for (Eigen::Index jx = 0; jx < first_valid; ++jx) {
       di = first_valid - jx;
       x[jx] = x0 - dx * di;
-      y[jx] = y0 - dy * di;
     }
   }
   // Mark all points as defined.
@@ -692,79 +678,49 @@ auto loess(const Grid4D<Type, AxisType> &grid, const uint32_t nx,
   return result;
 }
 
-/// Fills in the gaps between defined points in a matrix with interpolated
+/// Fills in the gaps between defined values in a matrix with interpolated
 /// values.
 ///
-/// @param x The x-coordinates of the points defining the matrix.
-/// @param y The y-coordinates of the points defining the matrix.
+/// @param x The data to be processed.
 template <typename T>
-void fill_matrix(pybind11::EigenDRef<Matrix<T>> x,
-                 pybind11::EigenDRef<Matrix<T>> y) {
-  auto mask = Matrix<bool>(Eigen::isnan(x.array()) || Eigen::isnan(y.array()));
+void matrix(pybind11::EigenDRef<Matrix<T>> x, const T &fill_value) {
+  Matrix<bool> mask;
+  if (std::isnan(fill_value)) {
+    mask = Eigen::isnan(x.array());
+  } else {
+    mask = x.array() == fill_value;
+  }
   auto num_rows = x.rows();
-  auto num_cols = y.cols();
+  auto num_cols = x.cols();
   // Fill in the rows.
   for (int ix = 0; ix < num_rows; ix++) {
     auto m = mask.row(ix);
     if (m.all()) {
       continue;
     }
-    detail::fill_line<T>(x.row(ix), y.row(ix), m);
+    detail::fill_line<T>(x.row(ix), m);
   }
   // Fill in the columns.
   for (int ix = 0; ix < num_cols; ix++) {
-    detail::fill_line<T>(x.col(ix), y.col(ix), mask.col(ix));
+    detail::fill_line<T>(x.col(ix), mask.col(ix));
   }
 }
 
-/// Fill gaps in a time series using linear interpolation.
+/// Fill gaps between defined values in a vector with interpolated values.
 ///
-/// The time series is assumed to be monotonically increasing or decreasing.
+/// The data is assumed to be monotonically increasing or decreasing.
 ///
 /// @param array Array of dates.
 /// @param fill_value Value to use for missing data.
 template <typename T>
-auto fill_time_series(const Eigen::Ref<const Vector<T>> &array,
-                      const T fill_value) -> Vector<T> {
-  auto result = Vector<int64_t>(array);
-  auto size = array.size();
-  Eigen::Index last_valid = -1;
-  Eigen::Index first_valid = -1;
-
-  for (Eigen::Index ix = 0; ix < size; ++ix) {
-    auto item = array[ix];
-    if (item != fill_value) {
-      if (last_valid != -1 && (ix - last_valid) > 1) {
-        auto x0 = array[last_valid];
-        auto x1 = item;
-        auto dx = (x1 - x0) / static_cast<T>(ix - last_valid);
-        for (Eigen::Index jx = last_valid + 1; jx < ix; ++jx) {
-          result[jx] = dx * static_cast<T>(jx - last_valid) + x0;
-        }
-      } else if (first_valid == -1) {
-        first_valid = ix;
-      }
-      last_valid = ix;
-    }
+auto vector(Eigen::Ref<Vector<T>> array, const T &fill_value) {
+  Vector<bool> mask;
+  if (std::isnan(fill_value)) {
+    mask = Eigen::isnan(array.array());
+  } else {
+    mask = array.array() == fill_value;
   }
-
-  if (last_valid != first_valid) {
-    auto x0 = array[first_valid];
-    auto x1 = array[last_valid];
-    auto dx = (x1 - x0) / static_cast<T>(last_valid - first_valid);
-    if (last_valid < (size - 1)) {
-      for (Eigen::Index jx = last_valid + 1; jx < size; ++jx) {
-        result[jx] = dx * static_cast<T>(jx - last_valid) + x1;
-      }
-    }
-
-    if (first_valid > 0) {
-      for (Eigen::Index jx = 0; jx < first_valid; ++jx) {
-        result[jx] = x0 - dx * static_cast<T>(first_valid - jx);
-      }
-    }
-  }
-  return result;
+  detail::fill_line<T>(array, mask);
 }
 
 }  // namespace fill

diff --git a/src/pyinterp/core/module/fill.cpp b/src/pyinterp/core/module/fill.cpp
@@ -78,15 +78,31 @@ method by relaxation.
 )__doc__",
         py::call_guard<py::gil_scoped_release>());
 
-  m.def(("matrix_" + function_suffix).c_str(),
-        &pyinterp::fill::fill_matrix<Type>, py::arg("x"), py::arg("y"),
+  m.def(("matrix_" + function_suffix).c_str(), &pyinterp::fill::matrix<Type>,
+        py::arg("x"),
+        py::arg("fill_value") = std::numeric_limits<Type>::quiet_NaN(),
         R"__doc__(
 Fills in the gaps between defined points in a matrix with interpolated
 values.
 
 Args:
-    x: X coordinates of the points to be interpolated.
-    y: Y coordinates of the points to be interpolated.
+    x: data to be interpolated.
+    fill_value: Value used to detect gaps in the matrix. Defaults to
+        ``NaN``.
+)__doc__",
+        py::call_guard<py::gil_scoped_release>());
+
+  m.def(("vector_" + function_suffix).c_str(), &pyinterp::fill::vector<Type>,
+        py::arg("x"),
+        py::arg("fill_value") = std::numeric_limits<Type>::quiet_NaN(),
+        R"__doc__(
+Fills in the gaps between defined points in a vector with interpolated
+values.
+
+Args:
+    x: data to be interpolated.
+    fill_value: Value used to detect gaps in the matrix. Defaults to
+        ``NaN``.
 )__doc__",
         py::call_guard<py::gil_scoped_release>());
 }
@@ -146,14 +162,14 @@ void init_fill(py::module &m) {
   implement_loess<float, int64_t, TemporalGrid4D<float>>(m, "Temporal",
                                                          "Float32");
 
-  m.def("fill_time_series", &pyinterp::fill::fill_time_series<int64_t>,
-        py::arg("x"), py::arg("fill_value"),
+  m.def("vector_int64", &pyinterp::fill::vector<int64_t>, py::arg("x"),
+        py::arg("fill_value"),
         R"__doc__(
-Fill gaps in a time series using linear interpolation.
+Fill gaps in a vector with interpolated values.
 
 Args:
-    x: Time series to be filled.
-    fill_value: Value used to detect gaps in the time series.
+    x: vector to be filled.
+    fill_value: Value used to detect gaps in the matrix.
 )__doc__",
         py::call_guard<py::gil_scoped_release>());
 }
diff --git a/src/pyinterp/fill.py b/src/pyinterp/fill.py
@@ -2,7 +2,7 @@
 Replace undefined values
 ------------------------
 """
-from typing import Optional, Union
+from typing import Any, Optional, Union
 import concurrent.futures
 
 import numpy
@@ -140,44 +140,57 @@ def gauss_seidel(mesh: Union[grid.Grid2D, grid.Grid3D],
     return residual <= epsilon, filled
 
 
-def matrix(x: NDArray, y: NDArray) -> None:
-    """Fills in the gaps between defined points in a matrix with interpolated
-    values.
+def matrix(x: NDArray,
+           fill_value: Any = numpy.nan,
+           in_place: bool = True) -> None:
+    """Fills in the gaps between defined values in a 2-dimensional array.
 
     Args:
-        x: X-axis coordinates of the grid.
-        y: Y-axis coordinates of the grid.
+        x: data to be filled.
+        fill_value: Value used to fill undefined values.
+        in_place: If true, the data is filled in place. Defaults to ``True``.
     """
     if len(x.shape) != 2:
         raise ValueError('x must be a 2-dimensional array')
-    if len(y.shape) != 2:
-        raise ValueError('y must be a 2-dimensional array')
-    dtype_x = x.dtype
-    dtype_y = y.dtype
-    if (dtype_x != dtype_y):
-        return core.fill.matrix_float64(x, y)
-    if dtype_x == numpy.float32:
-        return core.fill.matrix_float32(x, y)
-    return core.fill.matrix_float64(x, y)
+    dtype = x.dtype
+    if not in_place:
+        x = numpy.copy(x)
+    if dtype == numpy.float32:
+        core.fill.matrix_float32(x, fill_value)
+    core.fill.matrix_float64(x, fill_value)
+    return x
 
 
-def time_series(x: NDArray, fill_value=numpy.datetime64('NaT')) -> NDArray:
-    """Fill undefined values in a time series.
+def vector(x: NDArray,
+           fill_value: Any = numpy.nan,
+           in_place: bool = True) -> NDArray:
+    """Fill in the gaps between defined values in a 1-dimensional array.
 
     Args:
-        x (numpy.ndarray[numpy.datetime64]): Time series to be filled.
-        fill_value (numpy.datetime64): Value used to fill undefined values.
+        x: data to be filled.
+        fill_value: Value used to fill undefined values.
+        in_place: If true, the data is filled in place. Defaults to ``True``.
 
     Returns:
-        numpy.ndarray[numpy.datetime64]: Time series with undefined values
-            filled.
+        The data filled.
     """
     if not isinstance(x, numpy.ndarray):
         raise ValueError('x must be a numpy.ndarray')
-    if not numpy.issubdtype(x.dtype, numpy.datetime64):
-        raise ValueError('x must be a numpy.ndarray[numpy.datetime64]')
-    if not numpy.issubdtype(fill_value.dtype, numpy.datetime64):
-        raise ValueError('fill_value must be a numpy.datetime64')
-    return core.fill.fill_time_series(x.astype(numpy.int64),
-                                      fill_value.astype(numpy.int64)).astype(
-                                          x.dtype)
+    if len(x.shape) != 1:
+        raise ValueError('x must be a 1-dimensional array')
+    dtype = x.dtype
+    if not in_place:
+        x = numpy.copy(x)
+    if dtype == numpy.float32:
+        core.fill.vector_float32(x, fill_value)
+    elif dtype == numpy.float64:
+        core.fill.vector_float64(x, fill_value)
+    elif dtype == numpy.int64:
+        core.fill.vector_int64(x, fill_value)
+    elif numpy.issubdtype(dtype, numpy.datetime64) or numpy.issubdtype(
+            dtype, numpy.timedelta64):
+        core.fill.vector_int64(x.view(numpy.int64),
+                               fill_value.view(numpy.int64))
+    else:
+        raise ValueError(f'unsupported data type {dtype}')
+    return x