Skip to content

Commit

Permalink
feat(eap): Endpoint to get the tags available for a project (#6270)
Browse files Browse the repository at this point in the history
This is important for things like the search bar, where you want to
discover tags you can use

---------

Co-authored-by: volokluev <[email protected]>
  • Loading branch information
colin-sentry and volokluev authored Sep 6, 2024
1 parent 81798c3 commit 23508fd
Show file tree
Hide file tree
Showing 4 changed files with 267 additions and 1 deletion.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ sqlparse==0.4.2
google-api-python-client==2.88.0
sentry-usage-accountant==0.0.10
freezegun==1.2.2
sentry-protos==0.1.15
sentry-protos==0.1.16
73 changes: 73 additions & 0 deletions snuba/web/rpc/tags_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from datetime import datetime
from typing import List, Optional

from sentry_protos.snuba.v1alpha.endpoint_tags_list_pb2 import (
TagsListRequest,
TagsListResponse,
)

from snuba.clickhouse.formatter.nodes import FormattedQuery, StringNode
from snuba.datasets.schemas.tables import TableSource
from snuba.datasets.storages.factory import get_storage
from snuba.datasets.storages.storage_key import StorageKey
from snuba.utils.metrics.timer import Timer
from snuba.web.rpc.exceptions import BadSnubaRPCRequestException


def tags_list_query(
request: TagsListRequest, _timer: Optional[Timer] = None
) -> TagsListResponse:
str_storage = get_storage(StorageKey("spans_str_attrs"))
num_storage = get_storage(StorageKey("spans_num_attrs"))

str_data_source = str_storage.get_schema().get_data_source()
assert isinstance(str_data_source, TableSource)
num_data_source = num_storage.get_schema().get_data_source()
assert isinstance(num_data_source, TableSource)

if request.limit > 1000:
raise BadSnubaRPCRequestException("Limit can be at most 1000")

start_timestamp = datetime.utcfromtimestamp(request.meta.start_timestamp.seconds)
if start_timestamp.day >= datetime.utcnow().day and start_timestamp.hour != 0:
raise BadSnubaRPCRequestException(
"Tags' timestamps are stored per-day, you probably want to set start_timestamp to UTC 00:00 today or a time yesterday."
)

query = f"""
SELECT * FROM (
SELECT DISTINCT attr_key, 'str' as type, timestamp
FROM {str_data_source.get_table_name()}
WHERE organization_id={request.meta.organization_id}
AND project_id IN ({', '.join(str(pid) for pid in request.meta.project_ids)})
AND timestamp BETWEEN fromUnixTimestamp({request.meta.start_timestamp.seconds}) AND fromUnixTimestamp({request.meta.end_timestamp.seconds})
UNION ALL
SELECT DISTINCT attr_key, 'num' as type, timestamp
FROM {num_data_source.get_table_name()}
WHERE organization_id={request.meta.organization_id}
AND project_id IN ({', '.join(str(pid) for pid in request.meta.project_ids)})
AND timestamp BETWEEN fromUnixTimestamp({request.meta.start_timestamp.seconds}) AND fromUnixTimestamp({request.meta.end_timestamp.seconds})
)
ORDER BY attr_key
LIMIT {request.limit} OFFSET {request.offset}
"""

cluster = str_storage.get_cluster()
reader = cluster.get_reader()
result = reader.execute(FormattedQuery([StringNode(query)]))

tags: List[TagsListResponse.Tag] = []
for row in result.get("data", []):
tags.append(
TagsListResponse.Tag(
name=row["attr_key"],
type={
"str": TagsListResponse.TYPE_STRING,
"num": TagsListResponse.TYPE_NUMBER,
}[row["type"]],
)
)

return TagsListResponse(tags=tags)
3 changes: 3 additions & 0 deletions snuba/web/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
AggregateBucketRequest,
)
from sentry_protos.snuba.v1alpha.endpoint_span_samples_pb2 import SpanSamplesRequest
from sentry_protos.snuba.v1alpha.endpoint_tags_list_pb2 import TagsListRequest
from werkzeug import Response as WerkzeugResponse
from werkzeug.exceptions import InternalServerError

Expand Down Expand Up @@ -82,6 +83,7 @@
from snuba.web.query import parse_and_run_query
from snuba.web.rpc.exceptions import BadSnubaRPCRequestException
from snuba.web.rpc.span_samples import span_samples_query as span_samples_query
from snuba.web.rpc.tags_list import tags_list_query
from snuba.web.rpc.timeseries import timeseries_query as timeseries_query
from snuba.writer import BatchWriterEncoderWrapper, WriterTableRow

Expand Down Expand Up @@ -286,6 +288,7 @@ def rpc(*, name: str, timer: Timer) -> Response:
] = {
"AggregateBucketRequest": (timeseries_query, AggregateBucketRequest),
"SpanSamplesRequest": (span_samples_query, SpanSamplesRequest),
"TagsListRequest": (tags_list_query, TagsListRequest),
}
try:
endpoint, req_class = rpcs[name]
Expand Down
190 changes: 190 additions & 0 deletions tests/web/rpc/test_tags_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import uuid
from datetime import UTC, datetime, timedelta
from typing import Any, Mapping

import pytest
from google.protobuf.timestamp_pb2 import Timestamp
from sentry_protos.snuba.v1alpha.endpoint_tags_list_pb2 import (
TagsListRequest,
TagsListResponse,
)
from sentry_protos.snuba.v1alpha.request_common_pb2 import RequestMeta

from snuba.datasets.storages.factory import get_storage
from snuba.datasets.storages.storage_key import StorageKey
from snuba.web.rpc.tags_list import tags_list_query
from tests.base import BaseApiTest
from tests.helpers import write_raw_unprocessed_events

BASE_TIME = datetime.utcnow().replace(minute=0, second=0, microsecond=0) - timedelta(
minutes=180
)


def gen_message(id: int) -> Mapping[str, Any]:
res = {
"description": "/api/0/relays/projectconfigs/",
"duration_ms": 152,
"event_id": "d826225de75d42d6b2f01b957d51f18f",
"exclusive_time_ms": 0.228,
"is_segment": True,
"data": {},
"measurements": {},
"organization_id": 1,
"origin": "auto.http.django",
"project_id": 1,
"received": 1721319572.877828,
"retention_days": 90,
"segment_id": "8873a98879faf06d",
"sentry_tags": {
"category": "http",
},
"span_id": uuid.uuid4().hex,
"tags": {
"http.status_code": "200",
},
"trace_id": uuid.uuid4().hex,
"start_timestamp_ms": int(BASE_TIME.timestamp() * 1000),
"start_timestamp_precise": BASE_TIME.timestamp(),
"end_timestamp_precise": BASE_TIME.timestamp() + 1,
}
for i in range(id * 10, id * 10 + 10):
res["tags"][f"a_tag_{i:03}"] = "blah" # type: ignore
res["measurements"][f"b_measurement_{i:03}"] = {"value": 10} # type: ignore
return res


@pytest.fixture(autouse=True)
def setup_teardown(clickhouse_db: None, redis_db: None) -> None:
spans_storage = get_storage(StorageKey("eap_spans"))
messages = [gen_message(i) for i in range(3)]
write_raw_unprocessed_events(spans_storage, messages) # type: ignore


@pytest.mark.clickhouse_db
@pytest.mark.redis_db
class TestTagsList(BaseApiTest):
def test_basic(self) -> None:
ts = Timestamp()
ts.GetCurrentTime()
message = TagsListRequest(
meta=RequestMeta(
project_ids=[1, 2, 3],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=Timestamp(
seconds=int(
datetime(
year=BASE_TIME.year,
month=BASE_TIME.month,
day=BASE_TIME.day,
tzinfo=UTC,
).timestamp()
)
),
end_timestamp=Timestamp(
seconds=int(
datetime(
year=BASE_TIME.year,
month=BASE_TIME.month,
day=BASE_TIME.day + 1,
tzinfo=UTC,
).timestamp()
)
),
),
limit=10,
offset=20,
)
response = self.app.post(
"/rpc/TagsListRequest", data=message.SerializeToString()
)
assert response.status_code == 200

def test_simple_case(self, setup_teardown: Any) -> None:
message = TagsListRequest(
meta=RequestMeta(
project_ids=[1, 2, 3],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=Timestamp(
seconds=int(
datetime(
year=BASE_TIME.year,
month=BASE_TIME.month,
day=BASE_TIME.day - 1,
tzinfo=UTC,
).timestamp()
)
),
end_timestamp=Timestamp(
seconds=int(
datetime(
year=BASE_TIME.year,
month=BASE_TIME.month,
day=BASE_TIME.day + 1,
tzinfo=UTC,
).timestamp()
)
),
),
limit=10,
offset=0,
)
response = tags_list_query(message)
assert response.tags == [
TagsListResponse.Tag(
name=f"a_tag_{i:03}", type=TagsListResponse.TYPE_STRING
)
for i in range(0, 10)
]

def test_with_offset(self, setup_teardown: Any) -> None:
message = TagsListRequest(
meta=RequestMeta(
project_ids=[1, 2, 3],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=Timestamp(
seconds=int(
datetime(
year=BASE_TIME.year,
month=BASE_TIME.month,
day=BASE_TIME.day - 1,
tzinfo=UTC,
).timestamp()
)
),
end_timestamp=Timestamp(
seconds=int(
datetime(
year=BASE_TIME.year,
month=BASE_TIME.month,
day=BASE_TIME.day + 1,
tzinfo=UTC,
).timestamp()
)
),
),
limit=5,
offset=29,
)
response = tags_list_query(message)
assert response.tags == [
TagsListResponse.Tag(name="a_tag_029", type=TagsListResponse.TYPE_STRING),
TagsListResponse.Tag(
name="b_measurement_000", type=TagsListResponse.TYPE_NUMBER
),
TagsListResponse.Tag(
name="b_measurement_001", type=TagsListResponse.TYPE_NUMBER
),
TagsListResponse.Tag(
name="b_measurement_002", type=TagsListResponse.TYPE_NUMBER
),
TagsListResponse.Tag(
name="b_measurement_003", type=TagsListResponse.TYPE_NUMBER
),
]

0 comments on commit 23508fd

Please sign in to comment.