diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index 8cb6f303490c00..7b36d92b760341 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -236,8 +236,6 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("organizations:replay-ai-summaries-mobile", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Enable replay AI summaries for web replays manager.add("organizations:replay-ai-summaries", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) - # Enable reading replay details using EAP query - manager.add("organizations:replay-details-eap-query", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enable using the events replays dataset manager.add("organizations:events-use-replays-dataset", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enable using the events api with a sql interface diff --git a/src/sentry/replays/endpoints/organization_replay_details.py b/src/sentry/replays/endpoints/organization_replay_details.py index 16f229d11a2764..472b18516f9d5e 100644 --- a/src/sentry/replays/endpoints/organization_replay_details.py +++ b/src/sentry/replays/endpoints/organization_replay_details.py @@ -1,23 +1,10 @@ import uuid -from datetime import datetime, timezone from typing import TypedDict from drf_spectacular.utils import extend_schema from rest_framework.request import Request from rest_framework.response import Response -from snuba_sdk import ( - Column, - Condition, - Direction, - Entity, - Function, - Granularity, - Op, - OrderBy, - Query, -) -from sentry import features from sentry.api.api_publish_status import ApiPublishStatus from sentry.api.base import cell_silo_endpoint from sentry.api.bases.organization import NoProjects @@ -28,8 +15,6 @@ from sentry.constants import ALL_ACCESS_PROJECTS from sentry.models.organization import Organization from sentry.replays.endpoints.organization_replay_endpoint import OrganizationReplayEndpoint -from sentry.replays.lib.eap import read as eap_read -from sentry.replays.lib.eap.snuba_transpiler import RequestMeta, Settings from sentry.replays.post_process import ReplayDetailsResponse, process_raw_response from sentry.replays.query import query_replay_instance from sentry.replays.validators import ReplayValidator @@ -39,186 +24,6 @@ class GetReplayResponse(TypedDict): data: ReplayDetailsResponse -def _query_replay_urls_eap( - replay_id: str, - project_ids: list[int], - start: datetime, - end: datetime, - organization_id: int, -) -> list[str]: - """Query URLs for a replay from EAP breadcrumb events.""" - replay_id_no_dashes = replay_id.replace("-", "") - - first_seen_agg = Function("min", parameters=[Column("sentry.timestamp")], alias="first_seen") - - select = [ - Column("to"), - first_seen_agg, - ] - - snuba_query = Query( - match=Entity("replays"), - select=select, - where=[ - Condition(Column("replay_id"), Op.EQ, replay_id_no_dashes), - Condition(Column("category"), Op.EQ, "navigation"), - ], - groupby=[Column("to")], - orderby=[OrderBy(first_seen_agg, Direction.ASC)], - ) - - settings = Settings( - attribute_types={ - "replay_id": str, - "category": str, - "sentry.timestamp": float, - "to": str, - }, - default_limit=1000, - default_offset=0, - ) - - request_meta = RequestMeta( - cogs_category="replays", - debug=False, - start_datetime=start, - end_datetime=end, - organization_id=organization_id, - project_ids=project_ids, - referrer="replays.query.urls", - request_id=str(uuid.uuid4().hex), - trace_item_type="replay", - ) - - result = eap_read.query(snuba_query, settings, request_meta, []) - - urls: list[str] = [] - for row in result.get("data", []): - url = row.get("to") - if url and isinstance(url, str): - urls.append(url) - - return urls - - -def _normalize_eap_response(data: list[dict]) -> list[dict]: - """Normalize EAP response data for frontend compatibility. - - - Convert float timestamps to ISO strings - - Convert agg_project_id from float to int - """ - for item in data: - if "started_at" in item and isinstance(item["started_at"], float): - item["started_at"] = datetime.fromtimestamp( - item["started_at"], tz=timezone.utc - ).isoformat() - if "finished_at" in item and isinstance(item["finished_at"], float): - item["finished_at"] = datetime.fromtimestamp( - item["finished_at"], tz=timezone.utc - ).isoformat() - - # Convert project_id from float to int to avoid ".0" in output - if "agg_project_id" in item and isinstance(item["agg_project_id"], float): - item["agg_project_id"] = int(item["agg_project_id"]) - return data - - -def query_replay_instance_eap( - project_ids: list[int], - replay_ids: list[str], - start: datetime, - end: datetime, - organization_id: int, - request_user_id: int | None, - referrer: str = "replays.query.details_query", -): - # EAP stores replay_id in hex without dashes - replay_ids_no_dashes = [replay_id.replace("-", "") for replay_id in replay_ids] - - select = [ - Column("replay_id"), - Function("min", parameters=[Column("sentry.project_id")], alias="agg_project_id"), - Function("min", parameters=[Column("sentry.timestamp")], alias="started_at"), - Function("max", parameters=[Column("sentry.timestamp")], alias="finished_at"), - Function("count", parameters=[Column("segment_id")], alias="count_segments"), - Function("sum", parameters=[Column("count_error_events")], alias="count_errors"), - Function("sum", parameters=[Column("count_warning_events")], alias="count_warnings"), - Function("sum", parameters=[Column("count_info_events")], alias="count_infos"), - Function( - "sumIf", - parameters=[ - Column("click_is_dead"), - Function( - "greaterOrEquals", - [ - Column("sentry.timestamp"), - int(datetime(year=2023, month=7, day=24).timestamp()), - ], - ), - ], - alias="count_dead_clicks", - ), - Function( - "sumIf", - parameters=[ - Column("click_is_rage"), - Function( - "greaterOrEquals", - [ - Column("sentry.timestamp"), - int(datetime(year=2023, month=7, day=24).timestamp()), - ], - ), - ], - alias="count_rage_clicks", - ), - Function("max", parameters=[Column("is_archived")], alias="isArchived"), - ] - - snuba_query = Query( - match=Entity("replays"), - select=select, - where=[ - Condition(Column("replay_id"), Op.IN, replay_ids_no_dashes), - ], - groupby=[Column("replay_id")], - granularity=Granularity(3600), - ) - - settings = Settings( - attribute_types={ - "replay_id": str, - "sentry.project_id": int, - "sentry.timestamp": float, - "segment_id": int, - "is_archived": int, - "count_error_events": int, - "count_warning_events": int, - "count_info_events": int, - "click_is_dead": int, - "click_is_rage": int, - }, - default_limit=100, - default_offset=0, - ) - - request_meta = RequestMeta( - cogs_category="replays", - debug=False, - start_datetime=start, - end_datetime=end, - organization_id=organization_id, - project_ids=project_ids, - referrer=referrer, - request_id=str(uuid.uuid4().hex), - trace_item_type="replay", - ) - result = eap_read.query(snuba_query, settings, request_meta, []) - # Normalize EAP-specific data types (floats -> ints/ISO strings) - result["data"] = _normalize_eap_response(result["data"]) - return result - - @cell_silo_endpoint @extend_schema(tags=["Replays"]) class OrganizationReplayDetailsEndpoint(OrganizationReplayEndpoint): @@ -269,35 +74,14 @@ def get( projects = self.get_projects(request, organization, include_all_accessible=True) project_ids = [project.id for project in projects] - # Use EAP query if feature flag is enabled - if features.has("organizations:replay-details-eap-query", organization, actor=request.user): - snuba_response = query_replay_instance_eap( - project_ids=project_ids, - replay_ids=[replay_id], - start=filter_params["start"], - end=filter_params["end"], - organization_id=organization.id, - request_user_id=request.user.id, - )["data"] - - if snuba_response: - urls = _query_replay_urls_eap( - replay_id=replay_id, - project_ids=project_ids, - start=filter_params["start"], - end=filter_params["end"], - organization_id=organization.id, - ) - snuba_response[0]["urls_sorted"] = urls - else: - snuba_response = query_replay_instance( - project_id=project_ids, - replay_id=replay_id, - start=filter_params["start"], - end=filter_params["end"], - organization=organization, - request_user_id=request.user.id, - ) + snuba_response = query_replay_instance( + project_id=project_ids, + replay_id=replay_id, + start=filter_params["start"], + end=filter_params["end"], + organization=organization, + request_user_id=request.user.id, + ) replay_data = process_raw_response( snuba_response, diff --git a/tests/sentry/replays/endpoints/test_query_replay_instance_eap.py b/tests/sentry/replays/endpoints/test_query_replay_instance_eap.py deleted file mode 100644 index 3464c46efabdc7..00000000000000 --- a/tests/sentry/replays/endpoints/test_query_replay_instance_eap.py +++ /dev/null @@ -1,236 +0,0 @@ -import datetime -from typing import Any -from uuid import uuid4 - -from sentry.replays.endpoints.organization_replay_details import ( - _normalize_eap_response, - _query_replay_urls_eap, - query_replay_instance_eap, -) -from sentry.testutils.cases import ReplayBreadcrumbType, ReplayEAPTestCase, SnubaTestCase, TestCase - - -class TestQueryReplayInstanceEAP(TestCase, SnubaTestCase, ReplayEAPTestCase): - def test_eap_replay_query(self) -> None: - replay_id1 = uuid4().hex - replay_id2 = uuid4().hex - now = datetime.datetime.now(datetime.UTC) - - replay1_breadcrumbs = [ - # Dead clicks - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.DEAD_CLICK, - timestamp=now, - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.DEAD_CLICK, - timestamp=now, - ), - # Rage clicks - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.RAGE_CLICK, - timestamp=now, - ), - # Regular click - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.CLICK, - timestamp=now, - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.CLICK, - timestamp=now - datetime.timedelta(seconds=30), - category="navigation", - to="https://example.com/page1", - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.CLICK, - timestamp=now - datetime.timedelta(seconds=20), - category="navigation", - to="https://example.com/page2", - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.CLICK, - timestamp=now - datetime.timedelta(seconds=10), - category="navigation", - to="https://example.com/page3", - ), - ] - - replay2_breadcrumbs = [ - # Dead clicks - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id2, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.DEAD_CLICK, - timestamp=now, - ), - # Rage clicks - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id2, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.RAGE_CLICK, - timestamp=now, - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id2, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.RAGE_CLICK, - timestamp=now, - ), - ] - - self.store_eap_items(replay1_breadcrumbs + replay2_breadcrumbs) - - start = now - datetime.timedelta(minutes=5) - end = now + datetime.timedelta(minutes=5) - organization_id = self.organization.id - project_ids = [self.project.id] - - res1 = query_replay_instance_eap( - project_ids=project_ids, - replay_ids=[replay_id1], - start=start, - end=end, - request_user_id=self.user.id, - organization_id=organization_id, - ) - res2 = query_replay_instance_eap( - project_ids=project_ids, - replay_ids=[replay_id2], - start=start, - end=end, - request_user_id=self.user.id, - organization_id=organization_id, - ) - - assert isinstance(res1, dict) - assert isinstance(res2, dict) - assert res1.get("data") is not None - assert res2.get("data") is not None - - assert len(res1["data"]) == 1, f"Expected 1 row for replay_id1, got {len(res1['data'])}" - assert len(res2["data"]) == 1, f"Expected 1 row for replay_id2, got {len(res2['data'])}" - - assert res1["data"][0]["replay_id"] == replay_id1 - assert res2["data"][0]["replay_id"] == replay_id2 - - replay1_data = res1["data"][0] - assert "count_segments" in replay1_data - assert "agg_project_id" in replay1_data - - assert isinstance(replay1_data["agg_project_id"], int), ( - f"agg_project_id should be int after normalization, got {type(replay1_data['agg_project_id'])}" - ) - assert replay1_data["agg_project_id"] == self.project.id, ( - f"project_id mismatch: got {replay1_data['agg_project_id']}, expected {self.project.id}" - ) - assert "count_errors" in replay1_data - assert "count_warnings" in replay1_data - assert "count_dead_clicks" in replay1_data - assert "count_rage_clicks" in replay1_data - assert "isArchived" in replay1_data - assert "started_at" in replay1_data - assert "finished_at" in replay1_data - - assert replay1_data["started_at"] is not None, "started_at should not be None" - assert replay1_data["finished_at"] is not None, "finished_at should not be None" - - assert replay1_data["count_dead_clicks"] == 3, "2 DEAD_CLICK + 1 RAGE_CLICK = 3 dead" - assert replay1_data["count_rage_clicks"] == 1, "1 RAGE_CLICK = 1 rage" - - replay2_data = res2["data"][0] - assert replay2_data["count_dead_clicks"] == 3, "1 DEAD_CLICK + 2 RAGE_CLICK = 3 dead" - assert replay2_data["count_rage_clicks"] == 2, "2 RAGE_CLICK = 2 rage" - - # Test URL query for replay1 - urls = _query_replay_urls_eap( - replay_id=replay_id1, - project_ids=project_ids, - start=start, - end=end, - organization_id=organization_id, - ) - assert len(urls) == 3, f"Expected 3 URLs, got {len(urls)}" - assert urls == [ - "https://example.com/page1", - "https://example.com/page2", - "https://example.com/page3", - ], f"URLs should be sorted by timestamp ascending, got {urls}" - - urls2 = _query_replay_urls_eap( - replay_id=replay_id2, - project_ids=project_ids, - start=start, - end=end, - organization_id=organization_id, - ) - assert len(urls2) == 0, f"Expected 0 URLs for replay2, got {len(urls2)}" - - def test_normalize_eap_response(self) -> None: - """Test that EAP response data is correctly normalized. - - - Float timestamps should be converted to ISO strings - - Float project IDs should be converted to integers - """ - start_ts = 1690182000.0 - end_ts = 1690185600.0 - project_id_float = 4557221366136832.0 - - data: list[dict[str, Any]] = [ - { - "replay_id": "test123", - "started_at": start_ts, - "finished_at": end_ts, - "agg_project_id": project_id_float, - }, - { - "replay_id": "test456", - "started_at": None, - "finished_at": None, - "agg_project_id": None, - }, - ] - - normalized = _normalize_eap_response(data) - - # Test timestamp conversion - assert isinstance(normalized[0]["started_at"], str) - assert isinstance(normalized[0]["finished_at"], str) - assert normalized[1]["started_at"] is None - assert normalized[1]["finished_at"] is None - - parsed_start = datetime.datetime.fromisoformat(normalized[0]["started_at"]) - parsed_end = datetime.datetime.fromisoformat(normalized[0]["finished_at"]) - assert parsed_start.timestamp() == start_ts - assert parsed_end.timestamp() == end_ts - assert (parsed_end - parsed_start).total_seconds() == 3600 - - assert isinstance(normalized[0]["agg_project_id"], int) - assert normalized[0]["agg_project_id"] == int(project_id_float) - - assert ".0" not in str(normalized[0]["agg_project_id"]) - assert normalized[1]["agg_project_id"] is None