Skip to content

Commit

Permalink
Add track_total_hits parameter to doc-count operation (#474)
Browse files Browse the repository at this point in the history
The track_total_hits parameter forces an accurate doc-count even when the hit count for _search is above the default 10K limit. This resolves a bug where the total doc count was not being computed properly. Unit tests have also been updated to match the new path.

Signed-off-by: Kartik Ganesh <[email protected]>
  • Loading branch information
kartg authored Dec 14, 2023
1 parent d83515d commit c14529b
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
6 changes: 4 additions & 2 deletions FetchMigration/python/index_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
COUNT_KEY = "count"
__INDEX_KEY = "index"
__ALL_INDICES_ENDPOINT = "*"
__SEARCH_COUNT_PATH = "/_search?size=0"
__SEARCH_COUNT_PAYLOAD = {"aggs": {"count": {"terms": {"field": "_index"}}}}
# (ES 7+) size=0 avoids the "hits" payload to reduce the response size since we're only interested in the aggregation,
# and track_total_hits forces an accurate doc-count
__SEARCH_COUNT_PATH = "/_search"
__SEARCH_COUNT_PAYLOAD = {"size": 0, "track_total_hits": True, "aggs": {"count": {"terms": {"field": "_index"}}}}
__TOTAL_COUNT_JSONPATH = jsonpath_ng.parse("$.hits.total.value")
__INDEX_COUNT_JSONPATH = jsonpath_ng.parse("$.aggregations.count.buckets")
__BUCKET_INDEX_NAME_KEY = "key"
Expand Down
4 changes: 2 additions & 2 deletions FetchMigration/python/tests/test_index_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_doc_count(self):
for index_name in test_indices:
test_buckets.append({"key": index_name, "doc_count": index_doc_count})
total_docs: int = index_doc_count * len(test_buckets)
expected_count_endpoint = test_constants.SOURCE_ENDPOINT + ",".join(test_indices) + "/_search?size=0"
expected_count_endpoint = test_constants.SOURCE_ENDPOINT + ",".join(test_indices) + "/_search"
mock_count_response = {"hits": {"total": {"value": total_docs}},
"aggregations": {"count": {"buckets": test_buckets}}}
responses.get(expected_count_endpoint, json=mock_count_response)
Expand All @@ -110,7 +110,7 @@ def test_doc_count(self):
@responses.activate
def test_doc_count_error(self):
test_indices = {test_constants.INDEX1_NAME, test_constants.INDEX2_NAME}
expected_count_endpoint = test_constants.SOURCE_ENDPOINT + ",".join(test_indices) + "/_search?size=0"
expected_count_endpoint = test_constants.SOURCE_ENDPOINT + ",".join(test_indices) + "/_search"
responses.get(expected_count_endpoint, body=requests.Timeout())
self.assertRaises(RuntimeError, index_operations.doc_count, test_indices,
EndpointInfo(test_constants.SOURCE_ENDPOINT))
Expand Down

0 comments on commit c14529b

Please sign in to comment.