Skip to content

Commit

Permalink
Drop inconsistent dimension errored messages (#2268)
Browse files Browse the repository at this point in the history
* Drop inconsistent dimension errored messages

* Add more info to the log

* More log
  • Loading branch information
jotare authored Jun 21, 2024
1 parent 0240d91 commit 89c4daa
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 3 deletions.
34 changes: 32 additions & 2 deletions nucliadb_sidecar/src/nucliadb_sidecar/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ class MetadataNotFoundError(IndexNodeError):
pass


class InconsistentDimensionsError(IndexNodeError):
pass


class ShardNotFound(IndexNodeError):
pass

Expand Down Expand Up @@ -398,7 +402,7 @@ async def _index_message(self, pb: IndexMessage):
def _parse_op_status_errors(self, pb: IndexMessage, status: OpStatus):
if status.status == OpStatus.Status.OK:
return
if status.status == OpStatus.Status.ERROR and "Shard not found" in status.detail:
elif status.status == OpStatus.Status.ERROR and "Shard not found" in status.detail:
logger.warning(
f"Shard does not exist {pb.shard}. This message will be ignored",
extra={
Expand All @@ -409,8 +413,10 @@ def _parse_op_status_errors(self, pb: IndexMessage, status: OpStatus):
},
)
raise ShardNotFound()
if status.status == OpStatus.Status.ERROR and "Missing resource metadata" in status.detail:
elif status.status == OpStatus.Status.ERROR and "Missing resource metadata" in status.detail:
raise MetadataNotFoundError()
elif status.status == OpStatus.Status.ERROR and "Inconsistent dimensions" in status.detail:
raise InconsistentDimensionsError()
raise IndexNodeError(status.detail)

@contextlib.contextmanager
Expand Down Expand Up @@ -456,6 +462,18 @@ async def _set_resource(self, pb: IndexMessage) -> None:
},
)
return
except InconsistentDimensionsError:
logger.error(
"Trying to index vectors with an incorrect dimension! This should not happen!"
"We drop the message to not block the queue",
extra={
"kbid": pb.kbid,
"shard": pb.shard,
"rid": pb.resource,
"storage_key": pb.storage_key,
},
)
return

async def _set_resource_from_storage(self, pb: IndexMessage) -> None:
"""
Expand Down Expand Up @@ -488,6 +506,18 @@ async def _set_resource_from_storage(self, pb: IndexMessage) -> None:
},
)
return
except InconsistentDimensionsError:
logger.error(
"Trying to index vectors with an incorrect dimension! This should not happen!"
"We drop the message to not block the queue",
extra={
"kbid": pb.kbid,
"shard": pb.shard,
"rid": pb.resource,
"storage_key": pb.storage_key,
},
)
return

async def _delete_resource(self, pb: IndexMessage) -> None:
shard_id = pb.shard
Expand Down
19 changes: 18 additions & 1 deletion nucliadb_sidecar/tests/unit/test_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,13 +314,30 @@ async def test_node_writer_status_shard_not_found_errors_are_handled(
):
status = OpStatus()
status.status = OpStatus.Status.ERROR
status.detail = 'status: NotFound, message: "Shard not found: Shard'
status.detail = 'status: NotFound, message: "Shard not found: Shard"'

writer.set_resource_from_storage.return_value = status
pb = IndexMessage()
pb.typemessage = TypeMessage.CREATION
await indexer._index_message(pb)

@pytest.mark.asyncio
async def test_inconsistent_dimension_errors_are_handled(
self,
indexer: PriorityIndexer,
writer: AsyncMock,
):
status = OpStatus()
status.status = OpStatus.Status.ERROR
status.detail = 'status: Error, message: "Inconsistent dimensions"'

writer.set_resource = AsyncMock(return_value=status)
writer.set_resource_from_storage = AsyncMock(return_value=status)

pb = IndexMessage()
pb.typemessage = TypeMessage.CREATION
await indexer._index_message(pb)

@pytest.mark.asyncio
async def test_node_writer_aio_rpc_errors_are_handled(
self, indexer: PriorityIndexer, writer: AsyncMock
Expand Down

2 comments on commit 89c4daa

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 89c4daa Previous: 0d03d9f Ratio
tests/search/unit/search/test_fetch.py::test_highligh_error 2258.1453431685286 iter/sec (stddev: 0.0000029271035847457216) 2841.0684406726436 iter/sec (stddev: 0.000004954958228416619) 1.26

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 89c4daa Previous: 0d03d9f Ratio
tests/search/unit/search/test_fetch.py::test_highligh_error 1903.3817861062973 iter/sec (stddev: 0.000008732062297549518) 2841.0684406726436 iter/sec (stddev: 0.000004954958228416619) 1.49

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.