Skip to content

Commit

Permalink
Safer vectorset creation (#2591)
Browse files Browse the repository at this point in the history
* Safer creation of vectorset

* remove unused exception
  • Loading branch information
lferran authored Oct 30, 2024
1 parent 1ef20a5 commit 302bacc
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
12 changes: 11 additions & 1 deletion nucliadb/src/nucliadb/ingest/orm/knowledgebox.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,13 +477,23 @@ async def create_vectorset(self, config: knowledgebox_pb2.VectorSetConfig):
):
raise VectorSetConflict(f"Vectorset {config.vectorset_id} already exists")
await datamanagers.vectorsets.set(self.txn, kbid=self.kbid, config=config)

# Remove the async deletion mark if it exists, just in case there was a previous deletion
deletion_mark_key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=config.vectorset_id)
deletion_mark = await self.txn.get(deletion_mark_key, for_update=True)
if deletion_mark is not None:
await self.txn.delete(deletion_mark_key)

shard_manager = get_shard_manager()
await shard_manager.create_vectorset(self.kbid, config)

async def delete_vectorset(self, vectorset_id: str):
await datamanagers.vectorsets.delete(self.txn, kbid=self.kbid, vectorset_id=vectorset_id)

# mark vectorset for async deletion
await self.txn.set(KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id), b"")
deletion_mark_key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id)
await self.txn.set(deletion_mark_key, b"")

shard_manager = get_shard_manager()
await shard_manager.delete_vectorset(self.kbid, vectorset_id)

Expand Down
4 changes: 0 additions & 4 deletions nucliadb/src/nucliadb/writer/vectorsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@
from nucliadb_utils.utilities import get_storage


class EmbeddingNotFound(Exception):
pass


async def add(kbid: str, vectorset_id: str) -> None:
# First off, add the vectorset to the learning configuration if it's not already there
lconfig = await learning_proxy.get_configuration(kbid)
Expand Down

0 comments on commit 302bacc

Please sign in to comment.