diff --git a/nucliadb/src/nucliadb/ingest/orm/knowledgebox.py b/nucliadb/src/nucliadb/ingest/orm/knowledgebox.py index 0e78bf2cc8..49079bbcde 100644 --- a/nucliadb/src/nucliadb/ingest/orm/knowledgebox.py +++ b/nucliadb/src/nucliadb/ingest/orm/knowledgebox.py @@ -477,13 +477,23 @@ async def create_vectorset(self, config: knowledgebox_pb2.VectorSetConfig): ): raise VectorSetConflict(f"Vectorset {config.vectorset_id} already exists") await datamanagers.vectorsets.set(self.txn, kbid=self.kbid, config=config) + + # Remove the async deletion mark if it exists, just in case there was a previous deletion + deletion_mark_key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=config.vectorset_id) + deletion_mark = await self.txn.get(deletion_mark_key, for_update=True) + if deletion_mark is not None: + await self.txn.delete(deletion_mark_key) + shard_manager = get_shard_manager() await shard_manager.create_vectorset(self.kbid, config) async def delete_vectorset(self, vectorset_id: str): await datamanagers.vectorsets.delete(self.txn, kbid=self.kbid, vectorset_id=vectorset_id) + # mark vectorset for async deletion - await self.txn.set(KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id), b"") + deletion_mark_key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id) + await self.txn.set(deletion_mark_key, b"") + shard_manager = get_shard_manager() await shard_manager.delete_vectorset(self.kbid, vectorset_id) diff --git a/nucliadb/src/nucliadb/writer/vectorsets.py b/nucliadb/src/nucliadb/writer/vectorsets.py index 260eebd950..6696d84181 100644 --- a/nucliadb/src/nucliadb/writer/vectorsets.py +++ b/nucliadb/src/nucliadb/writer/vectorsets.py @@ -47,10 +47,6 @@ from nucliadb_utils.utilities import get_storage -class EmbeddingNotFound(Exception): - pass - - async def add(kbid: str, vectorset_id: str) -> None: # First off, add the vectorset to the learning configuration if it's not already there lconfig = await learning_proxy.get_configuration(kbid)