diff --git a/src/azul/indexer/__init__.py b/src/azul/indexer/__init__.py index 5d17349c0..dc4172c5d 100644 --- a/src/azul/indexer/__init__.py +++ b/src/azul/indexer/__init__.py @@ -17,6 +17,7 @@ Self, TypeVar, TypedDict, + final, ) import attrs @@ -45,21 +46,105 @@ BundleVersion = str -@attrs.frozen(kw_only=True, order=True) +@attrs.frozen(kw_only=True, eq=False) class BundleFQID(SupportsLessAndGreaterThan): """ - >>> list(sorted([ - ... BundleFQID(uuid='d', version='e'), - ... BundleFQID(uuid='a', version='c'), - ... BundleFQID(uuid='a', version='b'), - ... ])) - ... # doctest: +NORMALIZE_WHITESPACE - [BundleFQID(uuid='a', version='b'), - BundleFQID(uuid='a', version='c'), - BundleFQID(uuid='d', version='e')] + A fully qualified bundle identifier. The attributes defined in this class + must always be sufficient to decide whether two instances of this class or + its subclasses identify the same bundle or not. Subclasses may define + additional attributes to help describe the bundle, but they are forbidden + from using these attributes in the implementations of their `__eq__` or + `__hash__` methods, either explicitly or in code generated by `attrs`. """ - uuid: BundleUUID = attrs.field(order=str.lower) - version: BundleVersion = attrs.field(order=str.lower) + uuid: BundleUUID + version: BundleVersion + + def _nucleus(self) -> tuple[str, str]: + return self.uuid.lower(), self.version.lower() + + # We can't use attrs' generated implementation because it always + # considers operands with different types to be unequal, regardless of + # their inheritance relationships or how their attributes are annotated + # (e.g. specifying `eq=False` has no effect). We want instances of + # all subclasses to compare equal as long as `uuid` and `version` are + # equal. + @final + def __eq__(self, other: 'BundleFQID') -> bool: + """ + >>> b1 = BundleFQID(uuid='a', version='b') + >>> b2 = BundleFQID(uuid='a', version='b') + >>> b1 == b2 + True + + >>> s1 = SourceRef(id='x', spec=SimpleSourceSpec.parse('y:/0')) + >>> sb1 = SourcedBundleFQID(uuid='a', version='b', source=s1) + >>> sb2 = SourcedBundleFQID(uuid='a', version='b', source=s1) + >>> sb1 == sb2 + True + + >>> b1 == sb1 + True + + >>> s2 = SourceRef(id='w', spec=SimpleSourceSpec.parse('z:/0')) + >>> sb3 = SourcedBundleFQID(uuid='a', version='b', source=s2) + >>> b1 == sb3 + True + + >>> sb1 == sb3 + ... # doctest: +NORMALIZE_WHITESPACE + Traceback (most recent call last): + ... + AssertionError: (('a', 'b'), + SourceRef(id='x', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name='y')), + SourceRef(id='w', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name='z'))) + """ + same_bundle = self._nucleus() == other._nucleus() + if ( + same_bundle + and isinstance(self, SourcedBundleFQID) + and isinstance(other, SourcedBundleFQID) + ): + assert self.source == other.source, (self._nucleus(), self.source, other.source) + return same_bundle + + @final + def __hash__(self) -> int: + return hash(self._nucleus()) + + def __init_subclass__(cls, **kwargs): + """ + >>> @attrs.frozen(kw_only=True) + ... class FooBundleFQID(SourcedBundleFQID): + ... foo: str + Traceback (most recent call last): + ... + AssertionError: + + >>> @attrs.frozen(kw_only=True, eq=False) + ... class FooBundleFQID(SourcedBundleFQID): + ... foo: str + """ + super().__init_subclass__(**kwargs) + assert cls.__eq__ is BundleFQID.__eq__, cls + assert cls.__hash__ is BundleFQID.__hash__, cls + + # attrs doesn't allow `order=True` when `eq=False` + def __lt__(self, other: 'BundleFQID') -> bool: + """ + >>> list(sorted([ + ... BundleFQID(uuid='d', version='e'), + ... BundleFQID(uuid='a', version='c'), + ... BundleFQID(uuid='a', version='b'), + ... ])) + ... # doctest: +NORMALIZE_WHITESPACE + [BundleFQID(uuid='a', version='b'), + BundleFQID(uuid='a', version='c'), + BundleFQID(uuid='d', version='e')] + """ + return self._nucleus() < other._nucleus() + + def __gt__(self, other: 'BundleFQID'): + return self._nucleus() > other._nucleus() def to_json(self) -> MutableJSON: return attrs.asdict(self, recurse=False) @@ -462,7 +547,7 @@ class SourcedBundleFQIDJSON(BundleFQIDJSON): BUNDLE_FQID = TypeVar('BUNDLE_FQID', bound='SourcedBundleFQID') -@attrs.frozen(kw_only=True, order=True) +@attrs.frozen(kw_only=True, eq=False) class SourcedBundleFQID(BundleFQID, Generic[SOURCE_REF]): """ >>> spec = SimpleSourceSpec(name='', prefix=(Prefix(partition=0))) @@ -493,10 +578,6 @@ def from_json(cls, json: SourcedBundleFQIDJSON) -> 'SourcedBundleFQID': source = cls.source_ref_cls().from_json(json.pop('source')) return cls(source=source, **json) - def upcast(self) -> BundleFQID: - return BundleFQID(uuid=self.uuid, - version=self.version) - def to_json(self) -> SourcedBundleFQIDJSON: return dict(super().to_json(), source=self.source.to_json()) diff --git a/src/azul/indexer/document.py b/src/azul/indexer/document.py index 5e8a0cd64..f082193e6 100644 --- a/src/azul/indexer/document.py +++ b/src/azul/indexer/document.py @@ -519,23 +519,6 @@ class ContributionCoordinates(DocumentCoordinates[E], Generic[E]): deleted: bool - def __attrs_post_init__(self): - # If we were to allow instances of subclasses, we'd risk breaking - # equality and hashing semantics. It is impossible to correctly - # implement the transitivity property of equality between instances of - # type and subtype without ignoring the additional attributes added by - # the subtype. Consider types T and S where S is a subtype of T. - # Transitivity requires that s1 == s2 for any two instances s1 and s2 - # of S for which s1 == t and s2 == t, where t is any instance of T. The - # subtype instances s1 and s2 can only be equal to t if they match in - # all attributes that T defines. The additional attributes introduced - # by S must be ignored, even when comparing s1 and s2, otherwise s1 and - # s2 might turn out to be unequal. In this particular case that is not - # desirable: we do want to be able to compare instances of subtypes of - # BundleFQID without ignoring any of their attributes. - concrete_type = type(self.bundle) - assert concrete_type is BundleFQID, concrete_type - @property def document_id(self) -> str: return '_'.join(( diff --git a/src/azul/indexer/transform.py b/src/azul/indexer/transform.py index 4f6efb7f5..1e49c5746 100644 --- a/src/azul/indexer/transform.py +++ b/src/azul/indexer/transform.py @@ -123,7 +123,7 @@ def _contribution(self, ) -> Contribution: entity = EntityReference(entity_type=self.entity_type(), entity_id=entity_id) coordinates = ContributionCoordinates(entity=entity, - bundle=self.bundle.fqid.upcast(), + bundle=self.bundle.fqid, deleted=self.deleted) return Contribution(coordinates=coordinates, version=None, diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py index 530850366..a0162d2c1 100644 --- a/src/azul/plugins/repository/tdr_anvil/__init__.py +++ b/src/azul/plugins/repository/tdr_anvil/__init__.py @@ -119,7 +119,7 @@ class TDRAnvilBundleFQIDJSON(SourcedBundleFQIDJSON): table_name: str -@attrs.frozen(kw_only=True) +@attrs.frozen(kw_only=True, eq=False) class TDRAnvilBundleFQID(TDRBundleFQID): table_name: BundleType = attrs.field(converter=BundleType) diff --git a/test/indexer/test_indexer.py b/test/indexer/test_indexer.py index 2fbf8206b..3b24b211c 100644 --- a/test/indexer/test_indexer.py +++ b/test/indexer/test_indexer.py @@ -288,7 +288,7 @@ def test_deletion(self): self.assertNotEqual(doc.contents, {}) elif doc_type is DocumentType.contribution: doc = Contribution.from_index(field_types, hit) - self.assertEqual(bundle_fqid.upcast(), doc.coordinates.bundle) + self.assertEqual(bundle_fqid, doc.coordinates.bundle) self.assertFalse(doc.coordinates.deleted) elif doc_type is DocumentType.replica: pass @@ -315,7 +315,7 @@ def test_deletion(self): if doc_type is DocumentType.contribution: doc = Contribution.from_index(field_types, hit) docs_by_entity[doc.entity].append(doc) - self.assertEqual(bundle_fqid.upcast(), doc.coordinates.bundle) + self.assertEqual(bundle_fqid, doc.coordinates.bundle) else: # Since there is only one bundle and it was deleted, # nothing should be aggregated @@ -388,7 +388,7 @@ def test_duplicate_notification(self): coordinates = [ ContributionCoordinates( entity=entity, - bundle=bundle.fqid.upcast(), + bundle=bundle.fqid, deleted=False ).with_catalog(self.catalog) ] @@ -553,7 +553,7 @@ def test_multi_entity_contributing_bundles(self): entity_id=old_file_uuid, entity_type='files') deletion = ContributionCoordinates(entity=entity, - bundle=bundle_fqid.upcast(), + bundle=bundle_fqid, deleted=True) index_name, document_id = deletion.index_name, deletion.document_id hits = [ diff --git a/test/indexer/test_projects.py b/test/indexer/test_projects.py index c96cb4e49..d76dc5298 100644 --- a/test/indexer/test_projects.py +++ b/test/indexer/test_projects.py @@ -89,7 +89,7 @@ def test_no_duplicate_files_in_specimen(self): coordinates = AggregateCoordinates(entity=entity) else: coordinates = ContributionCoordinates(entity=entity, - bundle=bundle_fqid.upcast(), + bundle=bundle_fqid, deleted=False) result = self.es_client.get(index=coordinates.index_name, id=coordinates.document_id)