From d9e1d348116eafab40398235068299140c08e09e Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Fri, 13 Dec 2024 16:59:13 -0700 Subject: [PATCH] Fix cache exception when unexpected files present Fix a bug where DatastoreCacheManager would raise ValueError('badly formed hexadecimal UUID string') if files with unexpected names are present in the cache directory when trying to load a file from the cache. --- doc/changes/DM-46936.bugfix.md | 1 + .../daf/butler/datastore/cache_manager.py | 22 +++++++++++++++---- tests/test_datastore.py | 20 +++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) create mode 100644 doc/changes/DM-46936.bugfix.md diff --git a/doc/changes/DM-46936.bugfix.md b/doc/changes/DM-46936.bugfix.md new file mode 100644 index 0000000000..aded261fb7 --- /dev/null +++ b/doc/changes/DM-46936.bugfix.md @@ -0,0 +1 @@ +Fix a bug where DatastoreCacheManager would raise ValueError('badly formed hexadecimal UUID string') if files with unexpected names are present in the cache directory when trying to load a file from the cache. diff --git a/python/lsst/daf/butler/datastore/cache_manager.py b/python/lsst/daf/butler/datastore/cache_manager.py index 0cb4dd641d..13fa16dc8c 100644 --- a/python/lsst/daf/butler/datastore/cache_manager.py +++ b/python/lsst/daf/butler/datastore/cache_manager.py @@ -129,7 +129,10 @@ def _parse_cache_name(cached_location: str) -> tuple[uuid.UUID, str | None, str ext = "." + root_ext.pop(0) if root_ext else None parts = root.split("_") - id_ = uuid.UUID(parts.pop(0)) + try: + id_ = uuid.UUID(parts.pop(0)) + except ValueError as e: + raise InvalidCacheFilenameError(f"Invalid or missing ID in cache file name: {cached_location}") from e component = parts.pop(0) if parts else None return id_, component, ext @@ -894,9 +897,14 @@ def scan_cache(self) -> None: if file.relative_to(self._temp_exempt_directory) is not None: continue - path_in_cache = self._register_cache_entry(file, can_exist=True) - if path_in_cache: - found.add(path_in_cache) + try: + path_in_cache = self._register_cache_entry(file, can_exist=True) + if path_in_cache: + found.add(path_in_cache) + except InvalidCacheFilenameError: + # Skip files that are in the cache directory, but were not + # created by us. + pass # Find any files that were recorded in the cache but are no longer # on disk. (something else cleared them out?) @@ -1194,3 +1202,9 @@ def known_to_cache(self, ref: DatasetRef, extension: str | None = None) -> bool: def __str__(self) -> str: return f"{type(self).__name__}()" + + +class InvalidCacheFilenameError(Exception): + """Raised when attempting to register a file in the cache with a name in + the incorrect format. + """ diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 79ea7b7ab5..794ab8e7e1 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -1744,6 +1744,26 @@ def testExplicitCacheDir(self) -> None: # Test that the cache directory is not marked temporary self.assertFalse(cache_manager.cache_directory.isTemporary) + def testUnexpectedFilesInCacheDir(self) -> None: + """Test for regression of a bug where extraneous files in a cache + directory would cause all cache lookups to raise an exception. + """ + config_str = f""" +cached: + root: '{self.root}' + cacheable: + metric0: true + """ + + for filename in ["unexpected.txt", "unexpected", "un_expected", "un_expected.txt"]: + unexpected_file = os.path.join(self.root, filename) + with open(unexpected_file, "w") as fh: + fh.write("test") + + cache_manager = self._make_cache_manager(config_str) + cache_manager.scan_cache() + self.assertCache(cache_manager) + def assertCache(self, cache_manager: DatastoreCacheManager) -> None: self.assertTrue(cache_manager.should_be_cached(self.refs[0])) self.assertFalse(cache_manager.should_be_cached(self.refs[1]))