From 2f584c11e733d67b3b9253025da1838ca66293f4 Mon Sep 17 00:00:00 2001 From: Dan Fuller Date: Mon, 1 Jun 2026 15:17:01 -0700 Subject: [PATCH] ref(flags): Remove organizations:replay-details-eap-query Dev-only flag. Remove the flag registration and the gated EAP query code path that was only enabled for internal users. The production behavior (standard Snuba query) is retained as the unconditional path. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/sentry/features/temporary.py | 2 - .../endpoints/organization_replay_details.py | 232 +---------------- .../test_query_replay_instance_eap.py | 236 ------------------ 3 files changed, 8 insertions(+), 462 deletions(-) delete mode 100644 tests/sentry/replays/endpoints/test_query_replay_instance_eap.py diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index 8cb6f303490c00..7b36d92b760341 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -236,8 +236,6 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("organizations:replay-ai-summaries-mobile", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Enable replay AI summaries for web replays manager.add("organizations:replay-ai-summaries", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) - # Enable reading replay details using EAP query - manager.add("organizations:replay-details-eap-query", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enable using the events replays dataset manager.add("organizations:events-use-replays-dataset", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enable using the events api with a sql interface diff --git a/src/sentry/replays/endpoints/organization_replay_details.py b/src/sentry/replays/endpoints/organization_replay_details.py index 16f229d11a2764..472b18516f9d5e 100644 --- a/src/sentry/replays/endpoints/organization_replay_details.py +++ b/src/sentry/replays/endpoints/organization_replay_details.py @@ -1,23 +1,10 @@ import uuid -from datetime import datetime, timezone from typing import TypedDict from drf_spectacular.utils import extend_schema from rest_framework.request import Request from rest_framework.response import Response -from snuba_sdk import ( - Column, - Condition, - Direction, - Entity, - Function, - Granularity, - Op, - OrderBy, - Query, -) -from sentry import features from sentry.api.api_publish_status import ApiPublishStatus from sentry.api.base import cell_silo_endpoint from sentry.api.bases.organization import NoProjects @@ -28,8 +15,6 @@ from sentry.constants import ALL_ACCESS_PROJECTS from sentry.models.organization import Organization from sentry.replays.endpoints.organization_replay_endpoint import OrganizationReplayEndpoint -from sentry.replays.lib.eap import read as eap_read -from sentry.replays.lib.eap.snuba_transpiler import RequestMeta, Settings from sentry.replays.post_process import ReplayDetailsResponse, process_raw_response from sentry.replays.query import query_replay_instance from sentry.replays.validators import ReplayValidator @@ -39,186 +24,6 @@ class GetReplayResponse(TypedDict): data: ReplayDetailsResponse -def _query_replay_urls_eap( - replay_id: str, - project_ids: list[int], - start: datetime, - end: datetime, - organization_id: int, -) -> list[str]: - """Query URLs for a replay from EAP breadcrumb events.""" - replay_id_no_dashes = replay_id.replace("-", "") - - first_seen_agg = Function("min", parameters=[Column("sentry.timestamp")], alias="first_seen") - - select = [ - Column("to"), - first_seen_agg, - ] - - snuba_query = Query( - match=Entity("replays"), - select=select, - where=[ - Condition(Column("replay_id"), Op.EQ, replay_id_no_dashes), - Condition(Column("category"), Op.EQ, "navigation"), - ], - groupby=[Column("to")], - orderby=[OrderBy(first_seen_agg, Direction.ASC)], - ) - - settings = Settings( - attribute_types={ - "replay_id": str, - "category": str, - "sentry.timestamp": float, - "to": str, - }, - default_limit=1000, - default_offset=0, - ) - - request_meta = RequestMeta( - cogs_category="replays", - debug=False, - start_datetime=start, - end_datetime=end, - organization_id=organization_id, - project_ids=project_ids, - referrer="replays.query.urls", - request_id=str(uuid.uuid4().hex), - trace_item_type="replay", - ) - - result = eap_read.query(snuba_query, settings, request_meta, []) - - urls: list[str] = [] - for row in result.get("data", []): - url = row.get("to") - if url and isinstance(url, str): - urls.append(url) - - return urls - - -def _normalize_eap_response(data: list[dict]) -> list[dict]: - """Normalize EAP response data for frontend compatibility. - - - Convert float timestamps to ISO strings - - Convert agg_project_id from float to int - """ - for item in data: - if "started_at" in item and isinstance(item["started_at"], float): - item["started_at"] = datetime.fromtimestamp( - item["started_at"], tz=timezone.utc - ).isoformat() - if "finished_at" in item and isinstance(item["finished_at"], float): - item["finished_at"] = datetime.fromtimestamp( - item["finished_at"], tz=timezone.utc - ).isoformat() - - # Convert project_id from float to int to avoid ".0" in output - if "agg_project_id" in item and isinstance(item["agg_project_id"], float): - item["agg_project_id"] = int(item["agg_project_id"]) - return data - - -def query_replay_instance_eap( - project_ids: list[int], - replay_ids: list[str], - start: datetime, - end: datetime, - organization_id: int, - request_user_id: int | None, - referrer: str = "replays.query.details_query", -): - # EAP stores replay_id in hex without dashes - replay_ids_no_dashes = [replay_id.replace("-", "") for replay_id in replay_ids] - - select = [ - Column("replay_id"), - Function("min", parameters=[Column("sentry.project_id")], alias="agg_project_id"), - Function("min", parameters=[Column("sentry.timestamp")], alias="started_at"), - Function("max", parameters=[Column("sentry.timestamp")], alias="finished_at"), - Function("count", parameters=[Column("segment_id")], alias="count_segments"), - Function("sum", parameters=[Column("count_error_events")], alias="count_errors"), - Function("sum", parameters=[Column("count_warning_events")], alias="count_warnings"), - Function("sum", parameters=[Column("count_info_events")], alias="count_infos"), - Function( - "sumIf", - parameters=[ - Column("click_is_dead"), - Function( - "greaterOrEquals", - [ - Column("sentry.timestamp"), - int(datetime(year=2023, month=7, day=24).timestamp()), - ], - ), - ], - alias="count_dead_clicks", - ), - Function( - "sumIf", - parameters=[ - Column("click_is_rage"), - Function( - "greaterOrEquals", - [ - Column("sentry.timestamp"), - int(datetime(year=2023, month=7, day=24).timestamp()), - ], - ), - ], - alias="count_rage_clicks", - ), - Function("max", parameters=[Column("is_archived")], alias="isArchived"), - ] - - snuba_query = Query( - match=Entity("replays"), - select=select, - where=[ - Condition(Column("replay_id"), Op.IN, replay_ids_no_dashes), - ], - groupby=[Column("replay_id")], - granularity=Granularity(3600), - ) - - settings = Settings( - attribute_types={ - "replay_id": str, - "sentry.project_id": int, - "sentry.timestamp": float, - "segment_id": int, - "is_archived": int, - "count_error_events": int, - "count_warning_events": int, - "count_info_events": int, - "click_is_dead": int, - "click_is_rage": int, - }, - default_limit=100, - default_offset=0, - ) - - request_meta = RequestMeta( - cogs_category="replays", - debug=False, - start_datetime=start, - end_datetime=end, - organization_id=organization_id, - project_ids=project_ids, - referrer=referrer, - request_id=str(uuid.uuid4().hex), - trace_item_type="replay", - ) - result = eap_read.query(snuba_query, settings, request_meta, []) - # Normalize EAP-specific data types (floats -> ints/ISO strings) - result["data"] = _normalize_eap_response(result["data"]) - return result - - @cell_silo_endpoint @extend_schema(tags=["Replays"]) class OrganizationReplayDetailsEndpoint(OrganizationReplayEndpoint): @@ -269,35 +74,14 @@ def get( projects = self.get_projects(request, organization, include_all_accessible=True) project_ids = [project.id for project in projects] - # Use EAP query if feature flag is enabled - if features.has("organizations:replay-details-eap-query", organization, actor=request.user): - snuba_response = query_replay_instance_eap( - project_ids=project_ids, - replay_ids=[replay_id], - start=filter_params["start"], - end=filter_params["end"], - organization_id=organization.id, - request_user_id=request.user.id, - )["data"] - - if snuba_response: - urls = _query_replay_urls_eap( - replay_id=replay_id, - project_ids=project_ids, - start=filter_params["start"], - end=filter_params["end"], - organization_id=organization.id, - ) - snuba_response[0]["urls_sorted"] = urls - else: - snuba_response = query_replay_instance( - project_id=project_ids, - replay_id=replay_id, - start=filter_params["start"], - end=filter_params["end"], - organization=organization, - request_user_id=request.user.id, - ) + snuba_response = query_replay_instance( + project_id=project_ids, + replay_id=replay_id, + start=filter_params["start"], + end=filter_params["end"], + organization=organization, + request_user_id=request.user.id, + ) replay_data = process_raw_response( snuba_response, diff --git a/tests/sentry/replays/endpoints/test_query_replay_instance_eap.py b/tests/sentry/replays/endpoints/test_query_replay_instance_eap.py deleted file mode 100644 index 3464c46efabdc7..00000000000000 --- a/tests/sentry/replays/endpoints/test_query_replay_instance_eap.py +++ /dev/null @@ -1,236 +0,0 @@ -import datetime -from typing import Any -from uuid import uuid4 - -from sentry.replays.endpoints.organization_replay_details import ( - _normalize_eap_response, - _query_replay_urls_eap, - query_replay_instance_eap, -) -from sentry.testutils.cases import ReplayBreadcrumbType, ReplayEAPTestCase, SnubaTestCase, TestCase - - -class TestQueryReplayInstanceEAP(TestCase, SnubaTestCase, ReplayEAPTestCase): - def test_eap_replay_query(self) -> None: - replay_id1 = uuid4().hex - replay_id2 = uuid4().hex - now = datetime.datetime.now(datetime.UTC) - - replay1_breadcrumbs = [ - # Dead clicks - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.DEAD_CLICK, - timestamp=now, - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.DEAD_CLICK, - timestamp=now, - ), - # Rage clicks - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.RAGE_CLICK, - timestamp=now, - ), - # Regular click - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.CLICK, - timestamp=now, - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.CLICK, - timestamp=now - datetime.timedelta(seconds=30), - category="navigation", - to="https://example.com/page1", - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.CLICK, - timestamp=now - datetime.timedelta(seconds=20), - category="navigation", - to="https://example.com/page2", - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id1, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.CLICK, - timestamp=now - datetime.timedelta(seconds=10), - category="navigation", - to="https://example.com/page3", - ), - ] - - replay2_breadcrumbs = [ - # Dead clicks - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id2, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.DEAD_CLICK, - timestamp=now, - ), - # Rage clicks - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id2, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.RAGE_CLICK, - timestamp=now, - ), - self.create_eap_replay_breadcrumb( - project=self.project, - replay_id=replay_id2, - segment_id=0, - breadcrumb_type=ReplayBreadcrumbType.RAGE_CLICK, - timestamp=now, - ), - ] - - self.store_eap_items(replay1_breadcrumbs + replay2_breadcrumbs) - - start = now - datetime.timedelta(minutes=5) - end = now + datetime.timedelta(minutes=5) - organization_id = self.organization.id - project_ids = [self.project.id] - - res1 = query_replay_instance_eap( - project_ids=project_ids, - replay_ids=[replay_id1], - start=start, - end=end, - request_user_id=self.user.id, - organization_id=organization_id, - ) - res2 = query_replay_instance_eap( - project_ids=project_ids, - replay_ids=[replay_id2], - start=start, - end=end, - request_user_id=self.user.id, - organization_id=organization_id, - ) - - assert isinstance(res1, dict) - assert isinstance(res2, dict) - assert res1.get("data") is not None - assert res2.get("data") is not None - - assert len(res1["data"]) == 1, f"Expected 1 row for replay_id1, got {len(res1['data'])}" - assert len(res2["data"]) == 1, f"Expected 1 row for replay_id2, got {len(res2['data'])}" - - assert res1["data"][0]["replay_id"] == replay_id1 - assert res2["data"][0]["replay_id"] == replay_id2 - - replay1_data = res1["data"][0] - assert "count_segments" in replay1_data - assert "agg_project_id" in replay1_data - - assert isinstance(replay1_data["agg_project_id"], int), ( - f"agg_project_id should be int after normalization, got {type(replay1_data['agg_project_id'])}" - ) - assert replay1_data["agg_project_id"] == self.project.id, ( - f"project_id mismatch: got {replay1_data['agg_project_id']}, expected {self.project.id}" - ) - assert "count_errors" in replay1_data - assert "count_warnings" in replay1_data - assert "count_dead_clicks" in replay1_data - assert "count_rage_clicks" in replay1_data - assert "isArchived" in replay1_data - assert "started_at" in replay1_data - assert "finished_at" in replay1_data - - assert replay1_data["started_at"] is not None, "started_at should not be None" - assert replay1_data["finished_at"] is not None, "finished_at should not be None" - - assert replay1_data["count_dead_clicks"] == 3, "2 DEAD_CLICK + 1 RAGE_CLICK = 3 dead" - assert replay1_data["count_rage_clicks"] == 1, "1 RAGE_CLICK = 1 rage" - - replay2_data = res2["data"][0] - assert replay2_data["count_dead_clicks"] == 3, "1 DEAD_CLICK + 2 RAGE_CLICK = 3 dead" - assert replay2_data["count_rage_clicks"] == 2, "2 RAGE_CLICK = 2 rage" - - # Test URL query for replay1 - urls = _query_replay_urls_eap( - replay_id=replay_id1, - project_ids=project_ids, - start=start, - end=end, - organization_id=organization_id, - ) - assert len(urls) == 3, f"Expected 3 URLs, got {len(urls)}" - assert urls == [ - "https://example.com/page1", - "https://example.com/page2", - "https://example.com/page3", - ], f"URLs should be sorted by timestamp ascending, got {urls}" - - urls2 = _query_replay_urls_eap( - replay_id=replay_id2, - project_ids=project_ids, - start=start, - end=end, - organization_id=organization_id, - ) - assert len(urls2) == 0, f"Expected 0 URLs for replay2, got {len(urls2)}" - - def test_normalize_eap_response(self) -> None: - """Test that EAP response data is correctly normalized. - - - Float timestamps should be converted to ISO strings - - Float project IDs should be converted to integers - """ - start_ts = 1690182000.0 - end_ts = 1690185600.0 - project_id_float = 4557221366136832.0 - - data: list[dict[str, Any]] = [ - { - "replay_id": "test123", - "started_at": start_ts, - "finished_at": end_ts, - "agg_project_id": project_id_float, - }, - { - "replay_id": "test456", - "started_at": None, - "finished_at": None, - "agg_project_id": None, - }, - ] - - normalized = _normalize_eap_response(data) - - # Test timestamp conversion - assert isinstance(normalized[0]["started_at"], str) - assert isinstance(normalized[0]["finished_at"], str) - assert normalized[1]["started_at"] is None - assert normalized[1]["finished_at"] is None - - parsed_start = datetime.datetime.fromisoformat(normalized[0]["started_at"]) - parsed_end = datetime.datetime.fromisoformat(normalized[0]["finished_at"]) - assert parsed_start.timestamp() == start_ts - assert parsed_end.timestamp() == end_ts - assert (parsed_end - parsed_start).total_seconds() == 3600 - - assert isinstance(normalized[0]["agg_project_id"], int) - assert normalized[0]["agg_project_id"] == int(project_id_float) - - assert ".0" not in str(normalized[0]["agg_project_id"]) - assert normalized[1]["agg_project_id"] is None