diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml index 69d4e35f7..7939d29e1 100644 --- a/.github/workflows/github-actions.yml +++ b/.github/workflows/github-actions.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8'] + python-version: ['3.9'] steps: - uses: actions/checkout@v4 - name: Set Up Python @@ -63,7 +63,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set Up Python @@ -110,7 +110,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set Up Python diff --git a/README.rst b/README.rst index a0ce5a419..16780a3cc 100644 --- a/README.rst +++ b/README.rst @@ -325,7 +325,7 @@ Tests are written in the ``tests`` directory and processed using `PyTest `__ and, due to the use of unicode variable names/identifiers, is not compatible with Python 2.x. Given the small dependencies, STUMPY may work on older versions of Python but this is beyond the scope of our support and we strongly recommend that you upgrade to the most recent version of Python. +STUMPY supports `Python 3.9+ `__ and, due to the use of unicode variable names/identifiers, is not compatible with Python 2.x. Given the small dependencies, STUMPY may work on older versions of Python but this is beyond the scope of our support and we strongly recommend that you upgrade to the most recent version of Python. ------------ Getting Help diff --git a/docs/Contribute.ipynb b/docs/Contribute.ipynb index 038c435dd..f5f7e3c2a 100644 --- a/docs/Contribute.ipynb +++ b/docs/Contribute.ipynb @@ -170,6 +170,43 @@ "![Fetch Upstream Fork](images/fetch_upstream_fork.png) " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ":::{admonition} Instructions for Pulling Main into a Development Branch\n", + ":class: toggle\n", + "1. Open a terminal\n", + "2. Change the current working directory to your local development repository (e.g., cd Git/stumpy_dev.git)\n", + "3. Check out your local development branch (e.g., git switch some_new_feature)\n", + "4. Commit all changes in your local development branch (e.g., git add some_file.py and git commit)\n", + "5. Fetch the branches and their respective commits from the upstream repository (e.g., git fetch upstream)\n", + "6. Check out your fork's local default branch (e.g., git checkout main)\n", + "7. Merge the changes from the upstream default branch - in this case, upstream/main - into your local default branch (e.g., git merge upstream/main). This brings your fork's default branch into sync with the upstream repository, without losing your local changes.\n", + "8. If your local main branch didn't have any unique commits, Git will perform a fast-forward. Otherwise, if your local main branch had unique commits, you may need to resolve conflicts. Note that this does not affect your development branch!\n", + "9. Next, switch over to your development branch (e.g., git switch some_new_feature)\n", + "10. Finally, merge the main branch into your development branch (e.g., git merge main)\n", + " - You may see something like the following, if you do, you will need to open up the files tagged with CONFLICT and resolve the merge conflicts. Once that's done, you will need to commit the changes and push the commit (e.g., git push) back to Github.\n", + "\n", + "```\n", + "git merge main\n", + "Auto-merging stumpy/aamp_stimp.py\n", + "Auto-merging stumpy/core.py\n", + "Auto-merging stumpy/mpdist.py\n", + "CONFLICT (content): Merge conflict in stumpy/mpdist.py\n", + "Auto-merging stumpy/mstumped.py\n", + "CONFLICT (content): Merge conflict in stumpy/mstumped.py\n", + "Auto-merging stumpy/ostinato.py\n", + "Auto-merging stumpy/stimp.py\n", + "CONFLICT (content): Merge conflict in stumpy/stimp.py\n", + "Auto-merging stumpy/stumped.py\n", + "CONFLICT (content): Merge conflict in stumpy/stumped.py\n", + "Automatic merge failed; fix conflicts and then commit the result.\n", + "You will need to open up the files tagged with CONFLICT and resolve the merge conflicts. Once that's done, you will need to commit the changes and push the commit (e.g., git push) back to Github.\n", + "```\n", + ":::" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs/conf.py b/docs/conf.py index 88b69bdae..dcd2da20a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,6 +51,7 @@ "sphinx.ext.intersphinx", "sphinx.ext.mathjax", "sphinx.ext.viewcode", + "sphinx_togglebutton", "numpydoc", "myst_nb", ] diff --git a/docs/requirements.txt b/docs/requirements.txt index 67d3d3727..b1242ff4b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,3 +7,4 @@ scikit-learn numpydoc myst-nb jupyterlab-myst +sphinx-togglebutton diff --git a/environment.yml b/environment.yml index 919cdec19..30648ec44 100644 --- a/environment.yml +++ b/environment.yml @@ -2,10 +2,10 @@ channels: - numba - conda-forge dependencies: - - python>=3.8 - - numpy>=1.21 + - python>=3.9 + - numpy>=1.22 - scipy>=1.10 - - numba>=0.57.1 + - numba>=0.59.1 - pandas>=0.20.0 - flake8>=3.7.7 - flake8-docstrings>=1.5.0 diff --git a/pyproject.toml b/pyproject.toml index 1b5731861..1d5c2ed7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "stumpy" version = "1.13.0" -requires-python = ">=3.8" +requires-python = ">=3.9" authors = [ {name = "Sean M. Law", email = "seanmylaw@gmail.com"} ] @@ -23,7 +23,7 @@ classifiers = [ "Operating System :: POSIX", "Operating System :: Unix", "Operating System :: MacOS", - "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ] keywords = ["time series", "matrix profile", "motif", "discord"] maintainers = [ @@ -32,9 +32,9 @@ maintainers = [ ] license = {text = "3-clause BSD License"} dependencies = [ - "numpy >= 1.21", + "numpy >= 1.22", "scipy >= 1.10", - "numba >= 0.57.1" + "numba >= 0.59.1" ] [tool.setuptools] diff --git a/requirements.txt b/requirements.txt index 61f3bd793..d08f49a4c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -numpy>=1.21 +numpy>=1.22 scipy>=1.10 -numba>=0.57.1 +numba>=0.59.1 diff --git a/stumpy/aampi.py b/stumpy/aampi.py index 1a084f802..e5c2ee8a1 100644 --- a/stumpy/aampi.py +++ b/stumpy/aampi.py @@ -247,26 +247,9 @@ def _update_egress(self, t): if np.any(~self._T_isfinite[-self._m :]): D[:] = np.inf - core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - - update_idx = np.argwhere(D < self._P[:, -1]).flatten() - for i in update_idx: - idx = np.searchsorted(self._P[i], D[i], side="right") - core._shift_insert_at_index(self._P[i], idx, D[i]) - core._shift_insert_at_index( - self._I[i], idx, D.shape[0] + self._n_appended - 1 - ) - # D.shape[0] is base-1 - - # Calculate the (top-k) matrix profile values/indices for the last subsequence - # by using its corresponding distance profile `D` - self._P[-1] = np.inf - self._I[-1] = -1 - for i, d in enumerate(D): - if d < self._P[-1, -1]: - idx = np.searchsorted(self._P[-1], d, side="right") - core._shift_insert_at_index(self._P[-1], idx, d) - core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended) + core._update_incremental_PI( + D, self._P, self._I, self._excl_zone, n_appended=self._n_appended + ) # All neighbors of the last subsequence are on its left. So, its (top-1) # matrix profile value/index and its left matrix profile value/index must @@ -322,30 +305,17 @@ def _update(self, t): if np.any(~self._T_isfinite[-self._m :]): D[:] = np.inf - core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - - update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten() - for i in update_idx: - idx = np.searchsorted(self._P[i], D[i], side="right") - core._shift_insert_at_index(self._P[i], idx, D[i]) - core._shift_insert_at_index(self._I[i], idx, l) - - # Calculating top-k matrix profile and (top-1) left matrix profile (and their - # corresponding indices) for new subsequence whose distance profile is `D` P_new = np.full(self._k, np.inf, dtype=np.float64) I_new = np.full(self._k, -1, dtype=np.int64) - for i, d in enumerate(D): - if d < P_new[-1]: # maximum value in sorted array P_new - idx = np.searchsorted(P_new, d, side="right") - core._shift_insert_at_index(P_new, idx, d) - core._shift_insert_at_index(I_new, idx, i) + self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) + self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) + + core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0) - left_I_new = I_new[0] - left_P_new = P_new[0] + left_I_new = self._I[-1, 0] + left_P_new = self._P[-1, 0] self._T = T_new - self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) - self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) self._left_P = np.append(self._left_P, left_P_new) self._left_I = np.append(self._left_I, left_I_new) self._p_norm = p_norm_new diff --git a/stumpy/core.py b/stumpy/core.py index a237931a7..4cdaea02a 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -4368,3 +4368,70 @@ def get_ray_nworkers(ray_client): Total number of Ray workers """ return int(ray_client.cluster_resources().get("CPU")) + + +@njit +def _update_incremental_PI(D, P, I, excl_zone, n_appended=0): + """ + Given the 1D array distance profile, `D`, of the last subsequence of T, + update (in-place) the (top-k) matrix profile, `P`, and the matrix profile + index, I. + + Parameters + ---------- + D : numpy.ndarray + A 1D array (with dtype float) representing the distance profile of + the last subsequence of T + + P : numpy.ndarray + A 2D array representing the matrix profile of T, + with shape (len(T) - m + 1, k), where `m` is the window size. + P[-1, :] should be set to np.inf + + I : numpy.ndarray + A 2D array representing the matrix profile index of T, + with shape (len(T) - m + 1, k), where `m` is the window size + I[-1, :] should be set to -1. + + excl_zone : int + Size of the exclusion zone. + + n_appended : int + Number of times the timeseries start point is shifted one to the right. + See note below for more details. + + Returns + ------- + None + + Note + ----- + The `n_appended` parameter is used to indicate the number of times the timeseries + start point is shifted one to the right. When `egress=False` (see stumpy.stumpi), + the matrix profile and matrix profile index are updated in an incremental fashion + while considering all historical data. `n_appended` must be set to 0 in such + cases. However, when `egress=True`, the matrix profile and matrix profile index are + updated in an incremental fashion and they represent the matrix profile and matrix + profile index for the `l` most recent subsequences (where `l = len(T) - m + 1`). + In this case, each subsequence is only compared against upto `l-1` left neighbors + and upto `l-1` right neighbors. + """ + _apply_exclusion_zone(D, D.shape[0] - 1, excl_zone, np.inf) + + update_idx = np.argwhere(D < P[:, -1]).flatten() + for i in update_idx: + idx = np.searchsorted(P[i], D[i], side="right") + _shift_insert_at_index(P[i], idx, D[i]) + _shift_insert_at_index(I[i], idx, D.shape[0] + n_appended - 1) + + # Calculate the (top-k) matrix profile values/indidces + # for the last subsequence + P[-1] = np.inf + I[-1] = -1 + for i, d in enumerate(D): + if d < P[-1, -1]: + idx = np.searchsorted(P[-1], d, side="right") + _shift_insert_at_index(P[-1], idx, d) + _shift_insert_at_index(I[-1], idx, i + n_appended) + + return diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py index 1731b2808..feb8cb2af 100644 --- a/stumpy/stumpi.py +++ b/stumpy/stumpi.py @@ -354,26 +354,9 @@ def _update_egress(self, t): if np.any(~self._T_isfinite[-self._m :]): D[:] = np.inf - core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - - update_idx = np.argwhere(D < self._P[:, -1]).flatten() - for i in update_idx: - idx = np.searchsorted(self._P[i], D[i], side="right") - core._shift_insert_at_index(self._P[i], idx, D[i]) - core._shift_insert_at_index( - self._I[i], idx, D.shape[0] + self._n_appended - 1 - ) - # D.shape[0] is base-1 - - # Calculate the (top-k) matrix profile values/indices for the last subsequence - # by using its corresponding distance profile `D` - self._P[-1] = np.inf - self._I[-1] = -1 - for i, d in enumerate(D): - if d < self._P[-1, -1]: - idx = np.searchsorted(self._P[-1], d, side="right") - core._shift_insert_at_index(self._P[-1], idx, d) - core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended) + core._update_incremental_PI( + D, self._P, self._I, self._excl_zone, n_appended=self._n_appended + ) # All neighbors of the last subsequence are on its left. So, its (top-1) # matrix profile value/index and its left matrix profile value/index must @@ -440,30 +423,18 @@ def _update(self, t): if np.any(~self._T_isfinite[-self._m :]): D[:] = np.inf - core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - - update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten() - for i in update_idx: - idx = np.searchsorted(self._P[i], D[i], side="right") - core._shift_insert_at_index(self._P[i], idx, D[i]) - core._shift_insert_at_index(self._I[i], idx, l) - - # Calculating top-k matrix profile and (top-1) left matrix profile (and their - # corresponding indices) for new subsequence whose distance profile is `D` P_new = np.full(self._k, np.inf, dtype=np.float64) I_new = np.full(self._k, -1, dtype=np.int64) - for i, d in enumerate(D): - if d < P_new[-1]: # maximum value in sorted array P_new - idx = np.searchsorted(P_new, d, side="right") - core._shift_insert_at_index(P_new, idx, d) - core._shift_insert_at_index(I_new, idx, i) + self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) + self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) + + core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0) - left_I_new = I_new[0] - left_P_new = P_new[0] + left_I_new = self._I[-1, 0] + left_P_new = self._P[-1, 0] self._T = T_new - self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) - self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) + self._left_P = np.append(self._left_P, left_P_new) self._left_I = np.append(self._left_I, left_I_new) self._QT = QT_new diff --git a/tests/test_core.py b/tests/test_core.py index d35be0f89..8d0721979 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1778,3 +1778,173 @@ def test_process_isconstant_1d_default(): T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant=None) npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp) + + +def test_update_incremental_PI_egressFalse(): + # This tests the function `core._update_incremental_PI` + # when `egress` is False, meaning new data point is being + # appended to the historical data. + T = np.random.rand(64) + t = np.random.rand() # new datapoint + T_new = np.append(T, t) + + m = 3 + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + for k in range(1, 4): + # ref + mp_ref = naive.stump(T_new, m, row_wise=True, k=k) + P_ref = mp_ref[:, :k].astype(np.float64) + I_ref = mp_ref[:, k : 2 * k].astype(np.int64) + + # comp + mp = naive.stump(T, m, row_wise=True, k=k) + P_comp = mp[:, :k].astype(np.float64) + I_comp = mp[:, k : 2 * k].astype(np.int64) + + # Because of the new data point, the length of matrix profile + # and matrix profile indices should be increased by one. + P_comp = np.pad( + P_comp, + [(0, 1), (0, 0)], + mode="constant", + constant_values=np.inf, + ) + I_comp = np.pad( + I_comp, + [(0, 1), (0, 0)], + mode="constant", + constant_values=-1, + ) + + D = core.mass(T_new[-m:], T_new) + core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=0) + + # assertion + npt.assert_almost_equal(P_ref, P_comp) + npt.assert_almost_equal(I_ref, I_comp) + + +def test_update_incremental_PI_egressTrue(): + T = np.random.rand(64) + t = np.random.rand() # new data point + m = 3 + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + for k in range(1, 4): + # ref + # In egress=True mode, a new data point, t, is being appended + # to the historical data, T, while the oldest data point is + # being removed. Therefore, the first subsequence in T + # and the last subsequence does not get a chance to meet each + # other. Therefore, we need to exclude that distance. + + T_with_t = np.append(T, t) + D = naive.distance_matrix(T_with_t, T_with_t, m) + D[-1, 0] = np.inf + D[0, -1] = np.inf + + l = len(T_with_t) - m + 1 + P = np.empty((l, k), dtype=np.float64) + I = np.empty((l, k), dtype=np.int64) + for i in range(l): + core.apply_exclusion_zone(D[i], i, excl_zone, np.inf) + IDX = np.argsort(D[i], kind="mergesort")[:k] + I[i] = IDX + P[i] = D[i, IDX] + + P_ref = P[1:].copy() + I_ref = I[1:].copy() + + # comp + mp = naive.stump(T, m, row_wise=True, k=k) + P_comp = mp[:, :k].astype(np.float64) + I_comp = mp[:, k : 2 * k].astype(np.int64) + + P_comp[:-1] = P_comp[1:] + P_comp[-1] = np.inf + I_comp[:-1] = I_comp[1:] + I_comp[-1] = -1 + + T_new = np.append(T[1:], t) + D = core.mass(T_new[-m:], T_new) + core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=1) + + # assertion + npt.assert_almost_equal(P_ref, P_comp) + npt.assert_almost_equal(I_ref, I_comp) + + +def test_update_incremental_PI_egressTrue_MemoryCheck(): + # This test function is to ensure that the function + # `core._update_incremental_PI` does not forget the + # nearest neighbors that were pointing to those old data + # points that are removed in the `egress=True` mode. + # This can be tested by inserting the same subsequence, s, in the beginning, + # middle, and end of the time series. This is to allow us to know which + # neighbor is the nearest neighbor to each of those three subsequences. + + # In the `egress=True` mode, the first element of the time series is removed and + # a new data point is appended. However, the updated matrix profile index for the + # middle subsequence `s` should still refer to the first subsequence in + # the historical data. + seed = 0 + np.random.seed(seed) + + T = np.random.rand(64) + m = 3 + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + s = np.random.rand(m) + T[:m] = s + T[30 : 30 + m] = s + T[-m:] = s + + t = np.random.rand() # new data point + T_with_t = np.append(T, t) + + # In egress=True mode, a new data point, t, is being appended + # to the historical data, T, while the oldest data point is + # being removed. Therefore, the first subsequence in T + # and the last subsequence does not get a chance to meet each + # other. Therefore, their pairwise distances should be excluded + # from the distance matrix. + D = naive.distance_matrix(T_with_t, T_with_t, m) + D[-1, 0] = np.inf + D[0, -1] = np.inf + + l = len(T_with_t) - m + 1 + for i in range(l): + core.apply_exclusion_zone(D[i], i, excl_zone, np.inf) + + T_new = np.append(T[1:], t) + dist_profile = naive.distance_profile(T_new[-m:], T_new, m) + core.apply_exclusion_zone(dist_profile, len(dist_profile) - 1, excl_zone, np.inf) + + for k in range(1, 4): + # ref + P = np.empty((l, k), dtype=np.float64) + I = np.empty((l, k), dtype=np.int64) + for i in range(l): + IDX = np.argsort(D[i], kind="mergesort")[:k] + I[i] = IDX + P[i] = D[i, IDX] + + P_ref = P[1:].copy() + I_ref = I[1:].copy() + + # comp + mp = naive.stump(T, m, row_wise=True, k=k) + P_comp = mp[:, :k].astype(np.float64) + I_comp = mp[:, k : 2 * k].astype(np.int64) + + P_comp[:-1] = P_comp[1:] + P_comp[-1] = np.inf + I_comp[:-1] = I_comp[1:] + I_comp[-1] = -1 + core._update_incremental_PI( + dist_profile, P_comp, I_comp, excl_zone, n_appended=1 + ) + + npt.assert_almost_equal(P_ref, P_comp) + npt.assert_almost_equal(I_ref, I_comp)