Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 84 additions & 4 deletions src/sentry/integrations/github/webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
GithubWebhookType,
InstallationRepositoriesEvent,
)
from sentry.integrations.models.external_actor import ExternalActor
from sentry.integrations.pipeline import ensure_integration
from sentry.integrations.services.integration.model import (
RpcIntegration,
Expand All @@ -41,7 +42,11 @@
from sentry.integrations.services.integration.service import integration_service
from sentry.integrations.services.repository.service import repository_service
from sentry.integrations.source_code_management.webhook import SCMWebhook
from sentry.integrations.types import IntegrationProviderSlug
from sentry.integrations.types import (
ExternalActorSource,
ExternalProviders,
IntegrationProviderSlug,
)
from sentry.integrations.utils.metrics import IntegrationWebhookEvent, IntegrationWebhookEventType
from sentry.integrations.utils.scope import clear_tags_and_context
from sentry.integrations.utils.sync import sync_group_assignee_inbound_by_external_actor
Expand Down Expand Up @@ -324,6 +329,64 @@ def get_external_id(self, username: str) -> str:
def get_idp_external_id(self, integration: RpcIntegration, host: str | None = None) -> str:
return options.get("github-app.id")

def maybe_create_external_actor(
self,
*,
integration: RpcIntegration,
organization: Organization,
commit_author: CommitAuthor,
gh_username: str | None,
gh_user_id: str | int | None = None,
) -> None:
"""
For a newly created CommitAuthor, create the matching GitHub ExternalActor
mapping when we can confidently tie the author to a single Sentry user.

We only create a mapping when:
- the commit author has a GitHub username,
- the email is not an anonymous GitHub noreply address, and
- the email resolves to exactly one verified member of the organization.
"""
try:
if not gh_username:
return

if self.is_anonymous_email(commit_author.email):
return

users = commit_author.find_users()
# Only create a mapping when the email unambiguously resolves to a
# single Sentry user, otherwise we risk linking the wrong account.
if len(users) != 1:
return
user = users[0]

if integration.provider == IntegrationProviderSlug.GITHUB_ENTERPRISE.value:
provider = ExternalProviders.GITHUB_ENTERPRISE.value
else:
provider = ExternalProviders.GITHUB.value

external_name = f"@{gh_username.lstrip('@')}"

_, created = ExternalActor.objects.get_or_create(
organization_id=organization.id,
provider=provider,
external_name=external_name,
user_id=user.id,
defaults={
"integration_id": integration.id,
"external_id": str(gh_user_id) if gh_user_id else None,
"source": ExternalActorSource.COMMIT_AUTHOR.value,
},
)
except Exception as e:
# Never let external actor creation disrupt the main webhook flow.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that we don't want to lose the webhook when running this code, but wide try blocks don't really communicate what exceptions we're actually expecting, and tend to get larger and larger as time over time

To accomplish the same thing I'd either change the call sites:

try:
  maybe_create_external_actor(...)
except: ...

def maybe_create_external_actor(...):
  ... # without try/except

or use another private helper

def maybe_create_external_actor(...):
  try:
    _maybe_create_external_actor(...)
  except: ...

def _maybe_create_external_actor(...):
  ... # without try/except

logger.warning(
"github.webhook.external_actor.error",
extra={"organization_id": organization.id, "error": str(e)},
Comment on lines +384 to +386
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logger.exception or logger.error with the exc_info kwarg will ensure we create Sentry errors from any exceptions this triggers. I don't think we create errors for warnings by default so you'd have to scan logs for failures manually.

)
return


class InstallationEventWebhook(GitHubWebhook):
"""
Expand Down Expand Up @@ -570,11 +633,12 @@ def _handle(
if len(author_email) > 75:
author = None
elif author_email not in authors:
authors[author_email] = author = CommitAuthor.objects.get_or_create(
author, author_created = CommitAuthor.objects.get_or_create(
organization_id=organization.id,
email=author_email,
defaults={"name": commit["author"]["name"][:128]},
)[0]
)
authors[author_email] = author

update_kwargs = {}

Expand All @@ -595,6 +659,14 @@ def _handle(
author.update(**update_kwargs)
except IntegrityError:
pass

if author_created:
self.maybe_create_external_actor(
integration=integration,
organization=organization,
commit_author=author,
gh_username=gh_username,
)
else:
author = authors[author_email]

Expand Down Expand Up @@ -912,14 +984,22 @@ def _handle(
organization_id=organization.id, external_id=self.get_external_id(user["login"])
)
except CommitAuthor.DoesNotExist:
author, _created = CommitAuthor.objects.get_or_create(
author, author_created = CommitAuthor.objects.get_or_create(
organization_id=organization.id,
email=author_email,
defaults={
"name": user["login"][:128],
"external_id": self.get_external_id(user["login"]),
},
)
if author_created:
self.maybe_create_external_actor(
integration=integration,
organization=organization,
commit_author=author,
gh_username=user["login"],
gh_user_id=user.get("id"),
)

author.preload_users()
try:
Expand Down
133 changes: 133 additions & 0 deletions tests/sentry/integrations/github/test_webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
InstallationRepositoriesEventWebhook,
)
from sentry.integrations.github.webhook_types import InstallationRepositoriesEvent
from sentry.integrations.models.external_actor import ExternalActor
from sentry.integrations.models.integration import Integration
from sentry.integrations.models.organization_integration import OrganizationIntegration
from sentry.integrations.services.integration import integration_service
from sentry.integrations.types import ExternalActorSource, ExternalProviders
from sentry.middleware.integrations.parsers.github import GithubRequestParser
from sentry.models.commit import Commit
from sentry.models.commitauthor import CommitAuthor
Expand Down Expand Up @@ -688,6 +690,137 @@ def _create_integration_and_send_push_event(self):

assert response.status_code == 204

def _send_push_event(self, body: str):
sig1 = GitHubIntegrationsWebhookEndpoint.compute_signature(
"sha1", body.encode("utf-8"), self.secret
)
sig256 = GitHubIntegrationsWebhookEndpoint.compute_signature(
"sha256", body.encode("utf-8"), self.secret
)
return self.client.post(
path=self.url,
data=body,
content_type="application/json",
HTTP_X_GITHUB_EVENT="push",
HTTP_X_HUB_SIGNATURE=f"sha1={sig1}",
HTTP_X_HUB_SIGNATURE_256=f"sha256={sig256}",
HTTP_X_GITHUB_DELIVERY=str(uuid4()),
)

def _setup_github_integration_and_repo(self):
Repository.objects.create(
organization_id=self.organization.id,
external_id="35129377",
provider="integrations:github",
name="baxterthehacker/public-repo",
)
future_expires = datetime.now().replace(microsecond=0) + timedelta(minutes=5)
with assume_test_silo_mode(SiloMode.CONTROL):
integration = self.create_integration(
organization=self.organization,
external_id="12345",
provider="github",
metadata={"access_token": "1234", "expires_at": future_expires.isoformat()},
)
integration.add_organization(self.organization.id, self.user)
return integration

def _push_event_body(self, *, name: str, email: str, username: str | None) -> str:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the fixtures.github module has a few other events I see a PUSH_EVENT_EXAMPLE_INSTALLATION which may be useful here, but if not, I think it's worthwhile to add a new one there, it'd probably simplify this helper (or make it redundant)

author = {"name": name, "email": email}
if username is not None:
author["username"] = username
return json.dumps(
{
"ref": "refs/heads/main",
"installation": {"id": 12345},
"commits": [
{
"id": "133d60480286590a610a0eb7352ff6e02b9674c4",
"distinct": True,
"message": "Update hello.py",
"timestamp": "2015-05-05T19:45:15-04:00",
"author": author,
"added": [],
"removed": [],
"modified": ["hello.py"],
}
],
"repository": {
"id": 35129377,
"full_name": "baxterthehacker/public-repo",
"html_url": "https://github.com/baxterthehacker/public-repo",
},
}
)

@responses.activate
def test_creates_external_actor_for_new_commit_author(self) -> None:
member = self.create_user(email="newdev@example.com")
self.create_member(user=member, organization=self.organization)
integration = self._setup_github_integration_and_repo()

response = self._send_push_event(
self._push_event_body(name="New Dev", email="newdev@example.com", username="newdev")
)
assert response.status_code == 204

external_actors = list(ExternalActor.objects.filter(organization_id=self.organization.id))
assert len(external_actors) == 1
external_actor = external_actors[0]
assert external_actor.user_id == member.id
assert external_actor.external_name == "@newdev"
assert external_actor.provider == ExternalProviders.GITHUB.value
assert external_actor.integration_id == integration.id
assert external_actor.source == ExternalActorSource.COMMIT_AUTHOR.value

@responses.activate
def test_skips_external_actor_for_noreply_email(self) -> None:
member = self.create_user(email="newdev@example.com")
self.create_member(user=member, organization=self.organization)
self._setup_github_integration_and_repo()

response = self._send_push_event(
self._push_event_body(
name="New Dev",
email="newdev@users.noreply.github.com",
username="newdev",
)
)
assert response.status_code == 204

assert not ExternalActor.objects.filter(organization_id=self.organization.id).exists()

@responses.activate
def test_skips_external_actor_when_email_does_not_match_user(self) -> None:
self._setup_github_integration_and_repo()

response = self._send_push_event(
self._push_event_body(
name="Stranger", email="stranger@example.com", username="stranger"
)
)
assert response.status_code == 204

assert not ExternalActor.objects.filter(organization_id=self.organization.id).exists()

@responses.activate
def test_external_actor_creation_is_idempotent(self) -> None:
member = self.create_user(email="newdev@example.com")
self.create_member(user=member, organization=self.organization)
self._setup_github_integration_and_repo()

body = self._push_event_body(name="New Dev", email="newdev@example.com", username="newdev")
assert self._send_push_event(body).status_code == 204
# Re-sending creates a new CommitAuthor lookup but must not duplicate the mapping.
assert self._send_push_event(body).status_code == 204

assert (
ExternalActor.objects.filter(
organization_id=self.organization.id, user_id=member.id
).count()
== 1
)

@responses.activate
@patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1")
@patch("sentry.integrations.github.webhook.PushEventWebhook.__call__")
Expand Down
Loading