Skip to content

Commit

Permalink
Merge pull request #1134 from lsst/tickets/DM-46936
Browse files Browse the repository at this point in the history
DM-46936: Fix cache exception when unexpected files present
  • Loading branch information
dhirving authored Dec 17, 2024
2 parents 812ef82 + d9e1d34 commit 41225eb
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-46936.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug where DatastoreCacheManager would raise ValueError('badly formed hexadecimal UUID string') if files with unexpected names are present in the cache directory when trying to load a file from the cache.
22 changes: 18 additions & 4 deletions python/lsst/daf/butler/datastore/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@ def _parse_cache_name(cached_location: str) -> tuple[uuid.UUID, str | None, str
ext = "." + root_ext.pop(0) if root_ext else None

parts = root.split("_")
id_ = uuid.UUID(parts.pop(0))
try:
id_ = uuid.UUID(parts.pop(0))
except ValueError as e:
raise InvalidCacheFilenameError(f"Invalid or missing ID in cache file name: {cached_location}") from e
component = parts.pop(0) if parts else None
return id_, component, ext

Expand Down Expand Up @@ -894,9 +897,14 @@ def scan_cache(self) -> None:
if file.relative_to(self._temp_exempt_directory) is not None:
continue

path_in_cache = self._register_cache_entry(file, can_exist=True)
if path_in_cache:
found.add(path_in_cache)
try:
path_in_cache = self._register_cache_entry(file, can_exist=True)
if path_in_cache:
found.add(path_in_cache)
except InvalidCacheFilenameError:
# Skip files that are in the cache directory, but were not
# created by us.
pass

# Find any files that were recorded in the cache but are no longer
# on disk. (something else cleared them out?)
Expand Down Expand Up @@ -1194,3 +1202,9 @@ def known_to_cache(self, ref: DatasetRef, extension: str | None = None) -> bool:

def __str__(self) -> str:
return f"{type(self).__name__}()"


class InvalidCacheFilenameError(Exception):
"""Raised when attempting to register a file in the cache with a name in
the incorrect format.
"""
20 changes: 20 additions & 0 deletions tests/test_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1744,6 +1744,26 @@ def testExplicitCacheDir(self) -> None:
# Test that the cache directory is not marked temporary
self.assertFalse(cache_manager.cache_directory.isTemporary)

def testUnexpectedFilesInCacheDir(self) -> None:
"""Test for regression of a bug where extraneous files in a cache
directory would cause all cache lookups to raise an exception.
"""
config_str = f"""
cached:
root: '{self.root}'
cacheable:
metric0: true
"""

for filename in ["unexpected.txt", "unexpected", "un_expected", "un_expected.txt"]:
unexpected_file = os.path.join(self.root, filename)
with open(unexpected_file, "w") as fh:
fh.write("test")

cache_manager = self._make_cache_manager(config_str)
cache_manager.scan_cache()
self.assertCache(cache_manager)

def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
Expand Down

0 comments on commit 41225eb

Please sign in to comment.