diff --git a/src/sentry/integrations/github/webhook.py b/src/sentry/integrations/github/webhook.py index 60b4441ca8d9e8..4afff5caf965ad 100644 --- a/src/sentry/integrations/github/webhook.py +++ b/src/sentry/integrations/github/webhook.py @@ -33,6 +33,7 @@ GithubWebhookType, InstallationRepositoriesEvent, ) +from sentry.integrations.models.external_actor import ExternalActor from sentry.integrations.pipeline import ensure_integration from sentry.integrations.services.integration.model import ( RpcIntegration, @@ -41,7 +42,11 @@ from sentry.integrations.services.integration.service import integration_service from sentry.integrations.services.repository.service import repository_service from sentry.integrations.source_code_management.webhook import SCMWebhook -from sentry.integrations.types import IntegrationProviderSlug +from sentry.integrations.types import ( + ExternalActorSource, + ExternalProviders, + IntegrationProviderSlug, +) from sentry.integrations.utils.metrics import IntegrationWebhookEvent, IntegrationWebhookEventType from sentry.integrations.utils.scope import clear_tags_and_context from sentry.integrations.utils.sync import sync_group_assignee_inbound_by_external_actor @@ -324,6 +329,64 @@ def get_external_id(self, username: str) -> str: def get_idp_external_id(self, integration: RpcIntegration, host: str | None = None) -> str: return options.get("github-app.id") + def maybe_create_external_actor( + self, + *, + integration: RpcIntegration, + organization: Organization, + commit_author: CommitAuthor, + gh_username: str | None, + gh_user_id: str | int | None = None, + ) -> None: + """ + For a newly created CommitAuthor, create the matching GitHub ExternalActor + mapping when we can confidently tie the author to a single Sentry user. + + We only create a mapping when: + - the commit author has a GitHub username, + - the email is not an anonymous GitHub noreply address, and + - the email resolves to exactly one verified member of the organization. + """ + try: + if not gh_username: + return + + if self.is_anonymous_email(commit_author.email): + return + + users = commit_author.find_users() + # Only create a mapping when the email unambiguously resolves to a + # single Sentry user, otherwise we risk linking the wrong account. + if len(users) != 1: + return + user = users[0] + + if integration.provider == IntegrationProviderSlug.GITHUB_ENTERPRISE.value: + provider = ExternalProviders.GITHUB_ENTERPRISE.value + else: + provider = ExternalProviders.GITHUB.value + + external_name = f"@{gh_username.lstrip('@')}" + + _, created = ExternalActor.objects.get_or_create( + organization_id=organization.id, + provider=provider, + external_name=external_name, + user_id=user.id, + defaults={ + "integration_id": integration.id, + "external_id": str(gh_user_id) if gh_user_id else None, + "source": ExternalActorSource.COMMIT_AUTHOR.value, + }, + ) + except Exception as e: + # Never let external actor creation disrupt the main webhook flow. + logger.warning( + "github.webhook.external_actor.error", + extra={"organization_id": organization.id, "error": str(e)}, + ) + return + class InstallationEventWebhook(GitHubWebhook): """ @@ -570,11 +633,12 @@ def _handle( if len(author_email) > 75: author = None elif author_email not in authors: - authors[author_email] = author = CommitAuthor.objects.get_or_create( + author, author_created = CommitAuthor.objects.get_or_create( organization_id=organization.id, email=author_email, defaults={"name": commit["author"]["name"][:128]}, - )[0] + ) + authors[author_email] = author update_kwargs = {} @@ -595,6 +659,14 @@ def _handle( author.update(**update_kwargs) except IntegrityError: pass + + if author_created: + self.maybe_create_external_actor( + integration=integration, + organization=organization, + commit_author=author, + gh_username=gh_username, + ) else: author = authors[author_email] @@ -912,7 +984,7 @@ def _handle( organization_id=organization.id, external_id=self.get_external_id(user["login"]) ) except CommitAuthor.DoesNotExist: - author, _created = CommitAuthor.objects.get_or_create( + author, author_created = CommitAuthor.objects.get_or_create( organization_id=organization.id, email=author_email, defaults={ @@ -920,6 +992,14 @@ def _handle( "external_id": self.get_external_id(user["login"]), }, ) + if author_created: + self.maybe_create_external_actor( + integration=integration, + organization=organization, + commit_author=author, + gh_username=user["login"], + gh_user_id=user.get("id"), + ) author.preload_users() try: diff --git a/tests/sentry/integrations/github/test_webhook.py b/tests/sentry/integrations/github/test_webhook.py index c9323ec571d806..68b473fc86af63 100644 --- a/tests/sentry/integrations/github/test_webhook.py +++ b/tests/sentry/integrations/github/test_webhook.py @@ -25,9 +25,11 @@ InstallationRepositoriesEventWebhook, ) from sentry.integrations.github.webhook_types import InstallationRepositoriesEvent +from sentry.integrations.models.external_actor import ExternalActor from sentry.integrations.models.integration import Integration from sentry.integrations.models.organization_integration import OrganizationIntegration from sentry.integrations.services.integration import integration_service +from sentry.integrations.types import ExternalActorSource, ExternalProviders from sentry.middleware.integrations.parsers.github import GithubRequestParser from sentry.models.commit import Commit from sentry.models.commitauthor import CommitAuthor @@ -688,6 +690,137 @@ def _create_integration_and_send_push_event(self): assert response.status_code == 204 + def _send_push_event(self, body: str): + sig1 = GitHubIntegrationsWebhookEndpoint.compute_signature( + "sha1", body.encode("utf-8"), self.secret + ) + sig256 = GitHubIntegrationsWebhookEndpoint.compute_signature( + "sha256", body.encode("utf-8"), self.secret + ) + return self.client.post( + path=self.url, + data=body, + content_type="application/json", + HTTP_X_GITHUB_EVENT="push", + HTTP_X_HUB_SIGNATURE=f"sha1={sig1}", + HTTP_X_HUB_SIGNATURE_256=f"sha256={sig256}", + HTTP_X_GITHUB_DELIVERY=str(uuid4()), + ) + + def _setup_github_integration_and_repo(self): + Repository.objects.create( + organization_id=self.organization.id, + external_id="35129377", + provider="integrations:github", + name="baxterthehacker/public-repo", + ) + future_expires = datetime.now().replace(microsecond=0) + timedelta(minutes=5) + with assume_test_silo_mode(SiloMode.CONTROL): + integration = self.create_integration( + organization=self.organization, + external_id="12345", + provider="github", + metadata={"access_token": "1234", "expires_at": future_expires.isoformat()}, + ) + integration.add_organization(self.organization.id, self.user) + return integration + + def _push_event_body(self, *, name: str, email: str, username: str | None) -> str: + author = {"name": name, "email": email} + if username is not None: + author["username"] = username + return json.dumps( + { + "ref": "refs/heads/main", + "installation": {"id": 12345}, + "commits": [ + { + "id": "133d60480286590a610a0eb7352ff6e02b9674c4", + "distinct": True, + "message": "Update hello.py", + "timestamp": "2015-05-05T19:45:15-04:00", + "author": author, + "added": [], + "removed": [], + "modified": ["hello.py"], + } + ], + "repository": { + "id": 35129377, + "full_name": "baxterthehacker/public-repo", + "html_url": "https://github.com/baxterthehacker/public-repo", + }, + } + ) + + @responses.activate + def test_creates_external_actor_for_new_commit_author(self) -> None: + member = self.create_user(email="newdev@example.com") + self.create_member(user=member, organization=self.organization) + integration = self._setup_github_integration_and_repo() + + response = self._send_push_event( + self._push_event_body(name="New Dev", email="newdev@example.com", username="newdev") + ) + assert response.status_code == 204 + + external_actors = list(ExternalActor.objects.filter(organization_id=self.organization.id)) + assert len(external_actors) == 1 + external_actor = external_actors[0] + assert external_actor.user_id == member.id + assert external_actor.external_name == "@newdev" + assert external_actor.provider == ExternalProviders.GITHUB.value + assert external_actor.integration_id == integration.id + assert external_actor.source == ExternalActorSource.COMMIT_AUTHOR.value + + @responses.activate + def test_skips_external_actor_for_noreply_email(self) -> None: + member = self.create_user(email="newdev@example.com") + self.create_member(user=member, organization=self.organization) + self._setup_github_integration_and_repo() + + response = self._send_push_event( + self._push_event_body( + name="New Dev", + email="newdev@users.noreply.github.com", + username="newdev", + ) + ) + assert response.status_code == 204 + + assert not ExternalActor.objects.filter(organization_id=self.organization.id).exists() + + @responses.activate + def test_skips_external_actor_when_email_does_not_match_user(self) -> None: + self._setup_github_integration_and_repo() + + response = self._send_push_event( + self._push_event_body( + name="Stranger", email="stranger@example.com", username="stranger" + ) + ) + assert response.status_code == 204 + + assert not ExternalActor.objects.filter(organization_id=self.organization.id).exists() + + @responses.activate + def test_external_actor_creation_is_idempotent(self) -> None: + member = self.create_user(email="newdev@example.com") + self.create_member(user=member, organization=self.organization) + self._setup_github_integration_and_repo() + + body = self._push_event_body(name="New Dev", email="newdev@example.com", username="newdev") + assert self._send_push_event(body).status_code == 204 + # Re-sending creates a new CommitAuthor lookup but must not duplicate the mapping. + assert self._send_push_event(body).status_code == 204 + + assert ( + ExternalActor.objects.filter( + organization_id=self.organization.id, user_id=member.id + ).count() + == 1 + ) + @responses.activate @patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1") @patch("sentry.integrations.github.webhook.PushEventWebhook.__call__")