From 23508fd42ee9c854342e73fc3075eaa8a28854e1 Mon Sep 17 00:00:00 2001 From: colin-sentry <161344340+colin-sentry@users.noreply.github.com> Date: Thu, 5 Sep 2024 21:26:31 -0400 Subject: [PATCH] feat(eap): Endpoint to get the tags available for a project (#6270) This is important for things like the search bar, where you want to discover tags you can use --------- Co-authored-by: volokluev <3169433+volokluev@users.noreply.github.com> --- requirements.txt | 2 +- snuba/web/rpc/tags_list.py | 73 ++++++++++++ snuba/web/views.py | 3 + tests/web/rpc/test_tags_list.py | 190 ++++++++++++++++++++++++++++++++ 4 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 snuba/web/rpc/tags_list.py create mode 100644 tests/web/rpc/test_tags_list.py diff --git a/requirements.txt b/requirements.txt index c54fcbe274..3723e817ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,4 +45,4 @@ sqlparse==0.4.2 google-api-python-client==2.88.0 sentry-usage-accountant==0.0.10 freezegun==1.2.2 -sentry-protos==0.1.15 +sentry-protos==0.1.16 diff --git a/snuba/web/rpc/tags_list.py b/snuba/web/rpc/tags_list.py new file mode 100644 index 0000000000..2febde7de0 --- /dev/null +++ b/snuba/web/rpc/tags_list.py @@ -0,0 +1,73 @@ +from datetime import datetime +from typing import List, Optional + +from sentry_protos.snuba.v1alpha.endpoint_tags_list_pb2 import ( + TagsListRequest, + TagsListResponse, +) + +from snuba.clickhouse.formatter.nodes import FormattedQuery, StringNode +from snuba.datasets.schemas.tables import TableSource +from snuba.datasets.storages.factory import get_storage +from snuba.datasets.storages.storage_key import StorageKey +from snuba.utils.metrics.timer import Timer +from snuba.web.rpc.exceptions import BadSnubaRPCRequestException + + +def tags_list_query( + request: TagsListRequest, _timer: Optional[Timer] = None +) -> TagsListResponse: + str_storage = get_storage(StorageKey("spans_str_attrs")) + num_storage = get_storage(StorageKey("spans_num_attrs")) + + str_data_source = str_storage.get_schema().get_data_source() + assert isinstance(str_data_source, TableSource) + num_data_source = num_storage.get_schema().get_data_source() + assert isinstance(num_data_source, TableSource) + + if request.limit > 1000: + raise BadSnubaRPCRequestException("Limit can be at most 1000") + + start_timestamp = datetime.utcfromtimestamp(request.meta.start_timestamp.seconds) + if start_timestamp.day >= datetime.utcnow().day and start_timestamp.hour != 0: + raise BadSnubaRPCRequestException( + "Tags' timestamps are stored per-day, you probably want to set start_timestamp to UTC 00:00 today or a time yesterday." + ) + + query = f""" +SELECT * FROM ( + SELECT DISTINCT attr_key, 'str' as type, timestamp + FROM {str_data_source.get_table_name()} + WHERE organization_id={request.meta.organization_id} + AND project_id IN ({', '.join(str(pid) for pid in request.meta.project_ids)}) + AND timestamp BETWEEN fromUnixTimestamp({request.meta.start_timestamp.seconds}) AND fromUnixTimestamp({request.meta.end_timestamp.seconds}) + + UNION ALL + + SELECT DISTINCT attr_key, 'num' as type, timestamp + FROM {num_data_source.get_table_name()} + WHERE organization_id={request.meta.organization_id} + AND project_id IN ({', '.join(str(pid) for pid in request.meta.project_ids)}) + AND timestamp BETWEEN fromUnixTimestamp({request.meta.start_timestamp.seconds}) AND fromUnixTimestamp({request.meta.end_timestamp.seconds}) +) +ORDER BY attr_key +LIMIT {request.limit} OFFSET {request.offset} +""" + + cluster = str_storage.get_cluster() + reader = cluster.get_reader() + result = reader.execute(FormattedQuery([StringNode(query)])) + + tags: List[TagsListResponse.Tag] = [] + for row in result.get("data", []): + tags.append( + TagsListResponse.Tag( + name=row["attr_key"], + type={ + "str": TagsListResponse.TYPE_STRING, + "num": TagsListResponse.TYPE_NUMBER, + }[row["type"]], + ) + ) + + return TagsListResponse(tags=tags) diff --git a/snuba/web/views.py b/snuba/web/views.py index e8866eda29..088a9f9e5d 100644 --- a/snuba/web/views.py +++ b/snuba/web/views.py @@ -40,6 +40,7 @@ AggregateBucketRequest, ) from sentry_protos.snuba.v1alpha.endpoint_span_samples_pb2 import SpanSamplesRequest +from sentry_protos.snuba.v1alpha.endpoint_tags_list_pb2 import TagsListRequest from werkzeug import Response as WerkzeugResponse from werkzeug.exceptions import InternalServerError @@ -82,6 +83,7 @@ from snuba.web.query import parse_and_run_query from snuba.web.rpc.exceptions import BadSnubaRPCRequestException from snuba.web.rpc.span_samples import span_samples_query as span_samples_query +from snuba.web.rpc.tags_list import tags_list_query from snuba.web.rpc.timeseries import timeseries_query as timeseries_query from snuba.writer import BatchWriterEncoderWrapper, WriterTableRow @@ -286,6 +288,7 @@ def rpc(*, name: str, timer: Timer) -> Response: ] = { "AggregateBucketRequest": (timeseries_query, AggregateBucketRequest), "SpanSamplesRequest": (span_samples_query, SpanSamplesRequest), + "TagsListRequest": (tags_list_query, TagsListRequest), } try: endpoint, req_class = rpcs[name] diff --git a/tests/web/rpc/test_tags_list.py b/tests/web/rpc/test_tags_list.py new file mode 100644 index 0000000000..bb826ac7ba --- /dev/null +++ b/tests/web/rpc/test_tags_list.py @@ -0,0 +1,190 @@ +import uuid +from datetime import UTC, datetime, timedelta +from typing import Any, Mapping + +import pytest +from google.protobuf.timestamp_pb2 import Timestamp +from sentry_protos.snuba.v1alpha.endpoint_tags_list_pb2 import ( + TagsListRequest, + TagsListResponse, +) +from sentry_protos.snuba.v1alpha.request_common_pb2 import RequestMeta + +from snuba.datasets.storages.factory import get_storage +from snuba.datasets.storages.storage_key import StorageKey +from snuba.web.rpc.tags_list import tags_list_query +from tests.base import BaseApiTest +from tests.helpers import write_raw_unprocessed_events + +BASE_TIME = datetime.utcnow().replace(minute=0, second=0, microsecond=0) - timedelta( + minutes=180 +) + + +def gen_message(id: int) -> Mapping[str, Any]: + res = { + "description": "/api/0/relays/projectconfigs/", + "duration_ms": 152, + "event_id": "d826225de75d42d6b2f01b957d51f18f", + "exclusive_time_ms": 0.228, + "is_segment": True, + "data": {}, + "measurements": {}, + "organization_id": 1, + "origin": "auto.http.django", + "project_id": 1, + "received": 1721319572.877828, + "retention_days": 90, + "segment_id": "8873a98879faf06d", + "sentry_tags": { + "category": "http", + }, + "span_id": uuid.uuid4().hex, + "tags": { + "http.status_code": "200", + }, + "trace_id": uuid.uuid4().hex, + "start_timestamp_ms": int(BASE_TIME.timestamp() * 1000), + "start_timestamp_precise": BASE_TIME.timestamp(), + "end_timestamp_precise": BASE_TIME.timestamp() + 1, + } + for i in range(id * 10, id * 10 + 10): + res["tags"][f"a_tag_{i:03}"] = "blah" # type: ignore + res["measurements"][f"b_measurement_{i:03}"] = {"value": 10} # type: ignore + return res + + +@pytest.fixture(autouse=True) +def setup_teardown(clickhouse_db: None, redis_db: None) -> None: + spans_storage = get_storage(StorageKey("eap_spans")) + messages = [gen_message(i) for i in range(3)] + write_raw_unprocessed_events(spans_storage, messages) # type: ignore + + +@pytest.mark.clickhouse_db +@pytest.mark.redis_db +class TestTagsList(BaseApiTest): + def test_basic(self) -> None: + ts = Timestamp() + ts.GetCurrentTime() + message = TagsListRequest( + meta=RequestMeta( + project_ids=[1, 2, 3], + organization_id=1, + cogs_category="something", + referrer="something", + start_timestamp=Timestamp( + seconds=int( + datetime( + year=BASE_TIME.year, + month=BASE_TIME.month, + day=BASE_TIME.day, + tzinfo=UTC, + ).timestamp() + ) + ), + end_timestamp=Timestamp( + seconds=int( + datetime( + year=BASE_TIME.year, + month=BASE_TIME.month, + day=BASE_TIME.day + 1, + tzinfo=UTC, + ).timestamp() + ) + ), + ), + limit=10, + offset=20, + ) + response = self.app.post( + "/rpc/TagsListRequest", data=message.SerializeToString() + ) + assert response.status_code == 200 + + def test_simple_case(self, setup_teardown: Any) -> None: + message = TagsListRequest( + meta=RequestMeta( + project_ids=[1, 2, 3], + organization_id=1, + cogs_category="something", + referrer="something", + start_timestamp=Timestamp( + seconds=int( + datetime( + year=BASE_TIME.year, + month=BASE_TIME.month, + day=BASE_TIME.day - 1, + tzinfo=UTC, + ).timestamp() + ) + ), + end_timestamp=Timestamp( + seconds=int( + datetime( + year=BASE_TIME.year, + month=BASE_TIME.month, + day=BASE_TIME.day + 1, + tzinfo=UTC, + ).timestamp() + ) + ), + ), + limit=10, + offset=0, + ) + response = tags_list_query(message) + assert response.tags == [ + TagsListResponse.Tag( + name=f"a_tag_{i:03}", type=TagsListResponse.TYPE_STRING + ) + for i in range(0, 10) + ] + + def test_with_offset(self, setup_teardown: Any) -> None: + message = TagsListRequest( + meta=RequestMeta( + project_ids=[1, 2, 3], + organization_id=1, + cogs_category="something", + referrer="something", + start_timestamp=Timestamp( + seconds=int( + datetime( + year=BASE_TIME.year, + month=BASE_TIME.month, + day=BASE_TIME.day - 1, + tzinfo=UTC, + ).timestamp() + ) + ), + end_timestamp=Timestamp( + seconds=int( + datetime( + year=BASE_TIME.year, + month=BASE_TIME.month, + day=BASE_TIME.day + 1, + tzinfo=UTC, + ).timestamp() + ) + ), + ), + limit=5, + offset=29, + ) + response = tags_list_query(message) + assert response.tags == [ + TagsListResponse.Tag(name="a_tag_029", type=TagsListResponse.TYPE_STRING), + TagsListResponse.Tag( + name="b_measurement_000", type=TagsListResponse.TYPE_NUMBER + ), + TagsListResponse.Tag( + name="b_measurement_001", type=TagsListResponse.TYPE_NUMBER + ), + TagsListResponse.Tag( + name="b_measurement_002", type=TagsListResponse.TYPE_NUMBER + ), + TagsListResponse.Tag( + name="b_measurement_003", type=TagsListResponse.TYPE_NUMBER + ), + ]