diff --git a/alembic/versions/t8u9v0w1x2y3_add_anonymous_suggestion_fields.py b/alembic/versions/t8u9v0w1x2y3_add_anonymous_suggestion_fields.py new file mode 100644 index 00000000..eb81b99e --- /dev/null +++ b/alembic/versions/t8u9v0w1x2y3_add_anonymous_suggestion_fields.py @@ -0,0 +1,29 @@ +"""Add anonymous suggestion fields to suggestion_sessions. + +Revision ID: t8u9v0w1x2y3 +Revises: s7t8u9v0w1x2 +Create Date: 2026-04-03 +""" + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "t8u9v0w1x2y3" +down_revision = "s7t8u9v0w1x2" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column("suggestion_sessions", sa.Column("is_anonymous", sa.Boolean(), server_default="false", nullable=False)) + op.add_column("suggestion_sessions", sa.Column("submitter_name", sa.String(), nullable=True)) + op.add_column("suggestion_sessions", sa.Column("submitter_email", sa.String(), nullable=True)) + op.add_column("suggestion_sessions", sa.Column("client_ip", sa.String(), nullable=True)) + + +def downgrade() -> None: + op.drop_column("suggestion_sessions", "client_ip") + op.drop_column("suggestion_sessions", "submitter_email") + op.drop_column("suggestion_sessions", "submitter_name") + op.drop_column("suggestion_sessions", "is_anonymous") diff --git a/ontokit/api/routes/__init__.py b/ontokit/api/routes/__init__.py index 6e70e596..d1e10819 100644 --- a/ontokit/api/routes/__init__.py +++ b/ontokit/api/routes/__init__.py @@ -4,6 +4,7 @@ from ontokit.api.routes import ( analytics, + anonymous_suggestions, auth, classes, embeddings, @@ -39,6 +40,9 @@ router.include_router(classes.router, tags=["Classes"]) router.include_router(properties.router, tags=["Properties"]) router.include_router(suggestions.router, prefix="/projects", tags=["Suggestions"]) +router.include_router( + anonymous_suggestions.router, prefix="/projects", tags=["Anonymous Suggestions"] +) router.include_router(remote_sync.router, prefix="/projects", tags=["Sync from Remote"]) router.include_router(notifications.router, prefix="/notifications", tags=["Notifications"]) router.include_router(search.router, prefix="/search", tags=["Search"]) diff --git a/ontokit/api/routes/anonymous_suggestions.py b/ontokit/api/routes/anonymous_suggestions.py new file mode 100644 index 00000000..10255621 --- /dev/null +++ b/ontokit/api/routes/anonymous_suggestions.py @@ -0,0 +1,176 @@ +"""Anonymous suggestion session endpoints. + +Provides create/save/submit/discard/beacon endpoints for unauthenticated users. +All endpoints are gated on AUTH_MODE != "required". +""" + +from typing import Annotated +from uuid import UUID + +from fastapi import APIRouter, Depends, Header, Query, Request, status +from fastapi.responses import Response +from sqlalchemy.ext.asyncio import AsyncSession + +from ontokit.core.anonymous_token import verify_anonymous_token +from ontokit.core.config import settings +from ontokit.core.database import get_db +from ontokit.schemas.anonymous_suggestion import ( + AnonymousSessionCreateResponse, + AnonymousSubmitRequest, + AnonymousSubmitResponse, +) +from ontokit.schemas.suggestion import ( + SuggestionBeaconRequest, + SuggestionSaveRequest, + SuggestionSaveResponse, +) +from ontokit.services.suggestion_service import SuggestionService, get_suggestion_service + +router = APIRouter() + + +def _require_anonymous_mode() -> None: + """Raise 403 if anonymous suggestions are not enabled.""" + from fastapi import HTTPException + + if settings.auth_mode == "required": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Anonymous suggestions not available", + ) + + +def _verify_anon_token(x_anonymous_token: str) -> str: + """Verify the X-Anonymous-Token header and return the session_id. + + Raises 401 if the token is missing, invalid, or expired. + """ + from fastapi import HTTPException + + verified = verify_anonymous_token(x_anonymous_token) + if verified is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or expired anonymous token", + ) + return verified + + +def get_service(db: Annotated[AsyncSession, Depends(get_db)]) -> SuggestionService: + """Dependency to get suggestion service with database session.""" + return get_suggestion_service(db) + + +@router.post( + "/{project_id}/suggestions/anonymous/sessions", + response_model=AnonymousSessionCreateResponse, + status_code=status.HTTP_201_CREATED, +) +async def create_anonymous_session( + project_id: UUID, + request: Request, + service: Annotated[SuggestionService, Depends(get_service)], +) -> AnonymousSessionCreateResponse: + """Create a new anonymous suggestion session. + + No authentication required. Rate-limited to 5 sessions per IP per hour. + Only available when AUTH_MODE is not "required". + """ + _require_anonymous_mode() + client_ip = request.client.host if request.client else "unknown" + return await service.create_anonymous_session(project_id, client_ip) + + +@router.put( + "/{project_id}/suggestions/anonymous/sessions/{session_id}/save", + response_model=SuggestionSaveResponse, +) +async def save_anonymous_session( + project_id: UUID, + session_id: str, + data: SuggestionSaveRequest, + service: Annotated[SuggestionService, Depends(get_service)], + x_anonymous_token: Annotated[str, Header()], +) -> SuggestionSaveResponse: + """Save content to an anonymous suggestion session. + + Authenticated via X-Anonymous-Token header. + """ + _require_anonymous_mode() + verified_session_id = _verify_anon_token(x_anonymous_token) + return await service.save_anonymous(project_id, session_id, data, verified_session_id) + + +@router.post( + "/{project_id}/suggestions/anonymous/sessions/{session_id}/submit", + response_model=AnonymousSubmitResponse, +) +async def submit_anonymous_session( + project_id: UUID, + session_id: str, + data: AnonymousSubmitRequest, + service: Annotated[SuggestionService, Depends(get_service)], + x_anonymous_token: Annotated[str, Header()], +) -> AnonymousSubmitResponse: + """Submit an anonymous suggestion session as a pull request. + + Authenticated via X-Anonymous-Token header. + Honeypot field ('website') triggers silent fake success for bot detection. + """ + _require_anonymous_mode() + verified_session_id = _verify_anon_token(x_anonymous_token) + + # Honeypot check: bots fill the 'website' field, humans leave it blank + if data.honeypot is not None and data.honeypot != "": + # Silent fake success — do not create anything + return AnonymousSubmitResponse(pr_number=0, pr_url=None, status="submitted") + + return await service.submit_anonymous(project_id, session_id, data, verified_session_id) + + +@router.post( + "/{project_id}/suggestions/anonymous/sessions/{session_id}/discard", + status_code=status.HTTP_204_NO_CONTENT, +) +async def discard_anonymous_session( + project_id: UUID, + session_id: str, + service: Annotated[SuggestionService, Depends(get_service)], + x_anonymous_token: Annotated[str, Header()], +) -> Response: + """Discard an anonymous suggestion session and delete its branch. + + Authenticated via X-Anonymous-Token header. + """ + _require_anonymous_mode() + verified_session_id = _verify_anon_token(x_anonymous_token) + await service.discard_anonymous(project_id, session_id, verified_session_id) + return Response(status_code=status.HTTP_204_NO_CONTENT) + + +@router.post( + "/{project_id}/suggestions/anonymous/beacon", + status_code=status.HTTP_204_NO_CONTENT, +) +async def anonymous_beacon_save( + project_id: UUID, + data: SuggestionBeaconRequest, + service: Annotated[SuggestionService, Depends(get_service)], + token: str = Query(..., description="Anonymous session token for authentication"), +) -> Response: + """Handle a sendBeacon flush for anonymous sessions. + + Authenticated via 'token' query parameter (same pattern as authenticated beacon). + """ + _require_anonymous_mode() + verified_session_id = verify_anonymous_token(token) + if verified_session_id is None: + from fastapi import HTTPException + + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or expired anonymous token", + ) + # Delegate to the existing beacon_save (session lookup is by session_id, no user check) + await service.beacon_save(project_id, data, data.session_id) + return Response(status_code=status.HTTP_204_NO_CONTENT) diff --git a/ontokit/core/anonymous_token.py b/ontokit/core/anonymous_token.py new file mode 100644 index 00000000..9f8f0ea9 --- /dev/null +++ b/ontokit/core/anonymous_token.py @@ -0,0 +1,85 @@ +"""HMAC-based anonymous session token for unauthenticated suggestion workflows. + +Anonymous tokens are long-lived, session-scoped tokens that allow +unauthenticated users to save and submit suggestion sessions without +needing a Bearer token. They are used when AUTH_MODE is "optional" or "disabled". +""" + +import base64 +import hashlib +import hmac +import json +import time + +from ontokit.core.config import settings + +_INSECURE_DEFAULTS = {"change-me-in-production", ""} +_MIN_SECRET_LENGTH = 16 + +# Prefix added to HMAC input to prevent token type confusion with beacon tokens +_HMAC_PREFIX = "anon:" + + +def _check_secret_key() -> None: + """Raise if secret_key is an insecure placeholder or too short.""" + key = settings.secret_key + if key in _INSECURE_DEFAULTS or len(key) < _MIN_SECRET_LENGTH: + raise RuntimeError( + "SECRET_KEY is not configured securely. " + "Set a strong, random SECRET_KEY (>= 16 characters) before using anonymous tokens." + ) + + +def create_anonymous_token(session_id: str, ttl: int = 86400) -> str: + """Create an HMAC-signed anonymous session token. + + Args: + session_id: The suggestion session ID to scope the token to. + ttl: Time-to-live in seconds (default 24 hours). + + Returns: + Base64url-encoded token string. + """ + _check_secret_key() + if ttl <= 0: + raise ValueError("ttl must be a positive number of seconds") + payload = json.dumps({"sid": session_id, "exp": int(time.time()) + ttl}) + # Prepend "anon:" to differentiate from beacon tokens using the same secret + sig = hmac.new( + settings.secret_key.encode(), (_HMAC_PREFIX + payload).encode(), hashlib.sha256 + ).hexdigest() + return base64.urlsafe_b64encode(f"{payload}|{sig}".encode()).decode() + + +def verify_anonymous_token(token: str) -> str | None: + """Verify an anonymous session token and return the session_id if valid. + + Args: + token: The base64url-encoded token string. + + Returns: + The session_id if the token is valid and not expired, None otherwise. + """ + _check_secret_key() + try: + decoded = base64.urlsafe_b64decode(token.encode()).decode() + payload_str, sig = decoded.rsplit("|", 1) + expected = hmac.new( + settings.secret_key.encode(), + (_HMAC_PREFIX + payload_str).encode(), + hashlib.sha256, + ).hexdigest() + if not hmac.compare_digest(sig, expected): + return None + payload = json.loads(payload_str) + if not isinstance(payload, dict): + return None + exp = payload.get("exp") + sid = payload.get("sid") + if not isinstance(exp, (int, float)) or not isinstance(sid, str): + return None + if time.time() > exp: + return None + return sid + except Exception: + return None diff --git a/ontokit/core/auth.py b/ontokit/core/auth.py index 5d577d40..ba332b04 100644 --- a/ontokit/core/auth.py +++ b/ontokit/core/auth.py @@ -67,6 +67,15 @@ def is_superadmin(self) -> bool: return self.id in settings.superadmin_ids +# Anonymous user returned when auth is disabled +ANONYMOUS_USER = CurrentUser( + id="anonymous", + email=None, + name="Anonymous", + username="anonymous", + roles=["viewer"], +) + # Cache for JWKS (JSON Web Key Set) with TTL _jwks_cache: dict[str, Any] | None = None _jwks_cache_time: float = 0.0 @@ -252,6 +261,10 @@ async def get_current_user( Raises 401 if not authenticated. """ + if settings.auth_mode == "disabled": + return ANONYMOUS_USER + # "optional" mode: still require auth for RequiredUser (401 if no credentials) + # "required" mode: existing behavior (401 if no credentials) if credentials is None: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, @@ -293,6 +306,10 @@ async def get_current_user_optional( Useful for endpoints that work differently for authenticated vs anonymous users. """ + if settings.auth_mode == "disabled": + return ANONYMOUS_USER + # "optional" mode: existing behavior — returns None if no credentials, real user if valid token + # "required" mode: existing behavior if credentials is None: return None @@ -311,6 +328,9 @@ async def get_current_user_with_token( Raises 401 if not authenticated. Returns tuple of (CurrentUser, access_token). """ + if settings.auth_mode == "disabled": + return ANONYMOUS_USER, "anonymous" + # "optional" and "required" modes: existing behavior (401 if no credentials) if credentials is None: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, diff --git a/ontokit/core/config.py b/ontokit/core/config.py index 6e8c2b92..3706de82 100644 --- a/ontokit/core/config.py +++ b/ontokit/core/config.py @@ -80,6 +80,9 @@ def zitadel_jwks_base_url(self) -> str: frontend_url: str = "" # e.g. http://localhost:3000 revalidation_secret: str = "" # shared secret for sitemap revalidation + # Auth mode: "required" (default), "optional" (browse without login, sign in for editing), "disabled" (no auth) + auth_mode: str = "required" + # Superadmin - comma-separated list of user IDs with full system access superadmin_user_ids: str = "" diff --git a/ontokit/models/suggestion_session.py b/ontokit/models/suggestion_session.py index 696832e3..9ff83abe 100644 --- a/ontokit/models/suggestion_session.py +++ b/ontokit/models/suggestion_session.py @@ -5,7 +5,7 @@ from enum import StrEnum from typing import TYPE_CHECKING -from sqlalchemy import DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func +from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func from sqlalchemy.orm import Mapped, mapped_column, relationship if TYPE_CHECKING: @@ -54,6 +54,12 @@ class SuggestionSession(Base): # Auth beacon_token: Mapped[str] = mapped_column(String(500), nullable=False) + # Anonymous session fields + is_anonymous: Mapped[bool] = mapped_column(Boolean, default=False, server_default="false") + submitter_name: Mapped[str | None] = mapped_column(String(255), nullable=True) + submitter_email: Mapped[str | None] = mapped_column(String(255), nullable=True) + client_ip: Mapped[str | None] = mapped_column(String(45), nullable=True) + # PR link (set after submit) pr_number: Mapped[int | None] = mapped_column(Integer, nullable=True) pr_id: Mapped[uuid.UUID | None] = mapped_column( diff --git a/ontokit/schemas/anonymous_suggestion.py b/ontokit/schemas/anonymous_suggestion.py new file mode 100644 index 00000000..1b0bf68d --- /dev/null +++ b/ontokit/schemas/anonymous_suggestion.py @@ -0,0 +1,48 @@ +"""Schemas for anonymous suggestion session endpoints.""" + +from datetime import datetime + +from pydantic import BaseModel, Field + + +class AnonymousSessionCreateResponse(BaseModel): + """Response when creating an anonymous suggestion session.""" + + session_id: str + branch: str + created_at: datetime + anonymous_token: str + + class Config: + from_attributes = True + + +class AnonymousSubmitRequest(BaseModel): + """Request body for submitting an anonymous suggestion session. + + Includes optional credit fields and a honeypot field. + Bots filling the honeypot (aliased as 'website') trigger a silent fake success. + """ + + summary: str | None = Field(default=None, description="Optional summary of the changes") + submitter_name: str | None = Field( + default=None, description="Optional name to credit with the suggestion" + ) + submitter_email: str | None = Field( + default=None, description="Optional email to associate with the suggestion" + ) + honeypot: str | None = Field( + default=None, + alias="website", + description="Honeypot field — must be empty; bots fill this automatically", + ) + + model_config = {"populate_by_name": True} + + +class AnonymousSubmitResponse(BaseModel): + """Response after submitting an anonymous suggestion session.""" + + pr_number: int + pr_url: str | None = None + status: str diff --git a/ontokit/schemas/suggestion.py b/ontokit/schemas/suggestion.py index 62d83ba5..414a8dd1 100644 --- a/ontokit/schemas/suggestion.py +++ b/ontokit/schemas/suggestion.py @@ -72,6 +72,7 @@ class SuggestionSessionSummary(BaseModel): reviewed_at: datetime | None = None revision: int | None = None summary: str | None = None + is_anonymous: bool = False model_config = ConfigDict(from_attributes=True) diff --git a/ontokit/services/ontology_index.py b/ontokit/services/ontology_index.py index 79a13de5..0d700b8f 100644 --- a/ontokit/services/ontology_index.py +++ b/ontokit/services/ontology_index.py @@ -654,14 +654,12 @@ async def get_class_detail( # Return None so the frontend can distinguish "not indexed" from "zero". instance_count = None - # Get annotations (excluding rdfs:comment and label properties - # which are already returned via IndexedLabel) - label_property_iris = {str(uri) for _, uri in LABEL_PROPERTIES} - excluded_iris = label_property_iris | {rdfs_comment_iri} + # Get annotations from IndexedAnnotation (excludes rdfs:comment which + # is returned separately as `comments`) annotations_result = await self.db.execute( select(IndexedAnnotation).where( IndexedAnnotation.entity_id == entity.id, - IndexedAnnotation.property_iri.notin_(excluded_iris), + IndexedAnnotation.property_iri != rdfs_comment_iri, ) ) annotations_by_prop: dict[str, list[dict[str, str]]] = {} @@ -676,6 +674,25 @@ async def get_class_detail( } ) + # Also include non-rdfs:label entries from IndexedLabel as annotations + # (skos:altLabel, skos:prefLabel, dcterms:title — these are translations/synonyms) + non_rdfs_labels_result = await self.db.execute( + select(IndexedLabel).where( + IndexedLabel.entity_id == entity.id, + IndexedLabel.property_iri != rdfs_label_iri, + ) + ) + for lbl in non_rdfs_labels_result.scalars().all(): + key = lbl.property_iri + if key not in annotations_by_prop: + annotations_by_prop[key] = [] + annotations_by_prop[key].append( + { + "value": lbl.value, + "lang": lbl.lang or "", + } + ) + # Build annotation property list matching the response format annotation_list = [] for prop_iri, values in annotations_by_prop.items(): diff --git a/ontokit/services/suggestion_service.py b/ontokit/services/suggestion_service.py index 36bd64e7..767570c2 100644 --- a/ontokit/services/suggestion_service.py +++ b/ontokit/services/suggestion_service.py @@ -10,6 +10,8 @@ from typing import TYPE_CHECKING from uuid import UUID +from ontokit.core.anonymous_token import create_anonymous_token + if TYPE_CHECKING: from ontokit.models.pull_request import PullRequest @@ -24,6 +26,11 @@ from ontokit.git import GitRepositoryService, get_git_service from ontokit.models.project import Project from ontokit.models.suggestion_session import SuggestionSession, SuggestionSessionStatus +from ontokit.schemas.anonymous_suggestion import ( + AnonymousSessionCreateResponse, + AnonymousSubmitRequest, + AnonymousSubmitResponse, +) from ontokit.schemas.pull_request import PRCreate from ontokit.schemas.suggestion import ( SuggestionBeaconRequest, @@ -388,7 +395,15 @@ async def _create_pr_for_session( body_parts.append(f"\n**Entities modified** ({session.changes_count} changes):") for entity in entities: body_parts.append(f"- {entity}") - body_parts.append(f"\n*Submitted by {session.user_name or session.user_id}*") + is_anonymous = getattr(session, "is_anonymous", False) + if is_anonymous: + submitter_name = getattr(session, "submitter_name", None) + if submitter_name: + body_parts.append(f"\n*Submitted by {submitter_name}*") + else: + body_parts.append("\n*Submitted anonymously*") + else: + body_parts.append(f"\n*Submitted by {session.user_name or session.user_id}*") description = "\n".join(body_parts) # Check for an existing PR on this branch (idempotency on retry) @@ -530,7 +545,17 @@ async def _build_summary(self, s: SuggestionSession) -> SuggestionSessionSummary pr_url = pr.github_pr_url if hasattr(pr, "github_pr_url") else None github_pr_url = pr_url - submitter = SuggestionUser(id=s.user_id, name=s.user_name, email=s.user_email) + # For anonymous sessions, prefer submitter_name/email (credit info collected at submit) + # over the generic user_name/email set at session creation. + is_anonymous = getattr(s, "is_anonymous", False) + if is_anonymous: + submitter_name = getattr(s, "submitter_name", None) or s.user_name or "Anonymous" + submitter_email = getattr(s, "submitter_email", None) or s.user_email + else: + submitter_name = s.user_name + submitter_email = s.user_email + + submitter = SuggestionUser(id=s.user_id, name=submitter_name, email=submitter_email) reviewer = None if s.reviewer_id: reviewer = SuggestionUser( @@ -553,6 +578,7 @@ async def _build_summary(self, s: SuggestionSession) -> SuggestionSessionSummary reviewed_at=s.reviewed_at, revision=s.revision, summary=s.summary, + is_anonymous=is_anonymous, ) def _can_review(self, role: str | None, user: CurrentUser) -> bool: @@ -812,6 +838,257 @@ async def beacon_save( session.last_activity = datetime.now(UTC) await self.db.commit() + # --- Anonymous session methods --- + + async def create_anonymous_session( + self, project_id: UUID, client_ip: str + ) -> AnonymousSessionCreateResponse: + """Create an anonymous suggestion session with rate limiting. + + Checks that fewer than 5 anonymous sessions have been created from + the same IP address in the last hour before creating a new one. + """ + from sqlalchemy import func as sa_func + + # Verify project exists + await self._get_project(project_id) + + # Rate limit check: max 5 anonymous sessions per IP per hour + cutoff = datetime.now(UTC) - timedelta(hours=1) + rate_result = await self.db.execute( + select(sa_func.count(SuggestionSession.id)).where( + SuggestionSession.project_id == project_id, + SuggestionSession.is_anonymous.is_(True), + SuggestionSession.client_ip == client_ip, + SuggestionSession.created_at > cutoff, + ) + ) + session_count = rate_result.scalar() or 0 + if session_count >= 5: + raise HTTPException( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + detail="Rate limit exceeded. Try again later.", + ) + + # Generate identifiers + session_id = f"s_{secrets.token_hex(8)}" + branch = f"suggest/anonymous/{session_id}" + anonymous_token = create_anonymous_token(session_id) + beacon_token = create_beacon_token(session_id) + anon_user_id = f"anonymous-{secrets.token_hex(6)}" + + # Create the git branch + try: + self.git_service.create_branch(project_id, branch) + except Exception as e: + logger.error(f"Failed to create anonymous suggestion branch: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to create suggestion branch", + ) from e + + # Create the database record + db_session = SuggestionSession( + project_id=project_id, + user_id=anon_user_id, + user_name="Anonymous", + user_email=None, + session_id=session_id, + branch=branch, + beacon_token=beacon_token, + is_anonymous=True, + client_ip=client_ip, + ) + try: + self.db.add(db_session) + await self.db.commit() + except Exception: + await self.db.rollback() + try: + self.git_service.delete_branch(project_id, branch, force=True) + except Exception: + logger.warning(f"Failed to clean up orphaned anonymous branch {branch}") + raise + + try: + await self.db.refresh(db_session) + except Exception: + logger.warning("Anonymous session %s committed but refresh failed", session_id) + re_result = await self.db.execute( + select(SuggestionSession).where( + SuggestionSession.project_id == project_id, + SuggestionSession.session_id == session_id, + ) + ) + db_session = re_result.scalar_one() + + return AnonymousSessionCreateResponse( + session_id=db_session.session_id, + branch=db_session.branch, + created_at=db_session.created_at, + anonymous_token=anonymous_token, + ) + + async def save_anonymous( + self, + project_id: UUID, + session_id: str, + data: SuggestionSaveRequest, + verified_session_id: str, + ) -> SuggestionSaveResponse: + """Save content to an anonymous suggestion session branch.""" + session = await self._get_session(project_id, session_id) + + # Verify the token belongs to this session + if verified_session_id != session.session_id: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Token does not match session", + ) + if not session.is_anonymous: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Session is not an anonymous session", + ) + if session.status != SuggestionSessionStatus.ACTIVE.value: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Session is {session.status}, cannot save", + ) + + project = await self._get_project(project_id) + filename = self._get_git_ontology_path(project) + + async with _branch_locks[session.branch]: + commit_message = f"Update {data.entity_label}" + try: + commit_info = self.git_service.commit_to_branch( # type: ignore[attr-defined] + project_id=project_id, + branch_name=session.branch, + ontology_content=data.content.encode("utf-8"), + filename=filename, + message=commit_message, + author_name=session.user_name or "Anonymous", + author_email=session.user_email or "anonymous@ontokit.dev", + ) + except Exception as e: + logger.error(f"Failed to save anonymous suggestion: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to save suggestion to branch", + ) from e + + session.changes_count += 1 + self._update_entities_modified(session, data.entity_label) + session.last_activity = datetime.now(UTC) + try: + await self.db.commit() + except Exception as e: + await self.db.rollback() + logger.error( + "Failed to update anonymous session metadata: session=%s error=%s", + session.session_id, + e, + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Saved to branch but failed to update session metadata", + ) from e + + return SuggestionSaveResponse( + commit_hash=commit_info.hash, + branch=session.branch, + changes_count=session.changes_count, + ) + + async def submit_anonymous( + self, + project_id: UUID, + session_id: str, + data: AnonymousSubmitRequest, + verified_session_id: str, + ) -> AnonymousSubmitResponse: + """Submit an anonymous suggestion session as a pull request.""" + session = await self._get_session(project_id, session_id) + + if verified_session_id != session.session_id: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Token does not match session", + ) + if not session.is_anonymous: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Session is not an anonymous session", + ) + if session.status != SuggestionSessionStatus.ACTIVE.value: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Session is {session.status}, cannot submit", + ) + if session.changes_count == 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No changes to submit", + ) + + # Store optional credit info + if data.submitter_name or data.submitter_email: + session.submitter_name = data.submitter_name + session.submitter_email = data.submitter_email + # Update user_name so the PR description shows the provided credit name + session.user_name = data.submitter_name or "Anonymous" + + mock_user = CurrentUser( + id=session.user_id, + name=session.user_name or "Anonymous", + email=session.submitter_email, + ) + + result = await self._create_pr_for_session( + project_id, session, mock_user, data.summary, "submitted" + ) + + return AnonymousSubmitResponse( + pr_number=result.pr_number, + pr_url=result.pr_url, + status=result.status, + ) + + async def discard_anonymous( + self, + project_id: UUID, + session_id: str, + verified_session_id: str, + ) -> None: + """Discard an anonymous suggestion session and delete its branch.""" + session = await self._get_session(project_id, session_id) + + if verified_session_id != session.session_id: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Token does not match session", + ) + if not session.is_anonymous: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Session is not an anonymous session", + ) + if session.status != SuggestionSessionStatus.ACTIVE.value: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Session is {session.status}, cannot discard", + ) + + try: + self.git_service.delete_branch(project_id, session.branch, force=True) + except Exception as e: + logger.warning(f"Failed to delete anonymous suggestion branch {session.branch}: {e}") + + session.status = SuggestionSessionStatus.DISCARDED.value + session.last_activity = datetime.now(UTC) + await self.db.commit() + async def auto_submit_stale_sessions(self) -> int: """Auto-create PRs for stale suggestion sessions. diff --git a/scripts/seed-project.py b/scripts/seed-project.py new file mode 100644 index 00000000..b3563beb --- /dev/null +++ b/scripts/seed-project.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +""" +Seed a project by importing an OWL file and building the ontology index. + +Bypasses the API (which requires MinIO for imports and Redis for indexing) +by directly creating the git repo, database records, and index. + +Usage: + # Import FOLIO from GitHub and index it + python scripts/seed-project.py \ + --name "FOLIO" \ + --description "Free Open Legal Information Ontology" \ + --owl-url "https://raw.githubusercontent.com/alea-institute/FOLIO/main/FOLIO.owl" \ + --public + + # Import from a local file + python scripts/seed-project.py \ + --name "My Ontology" \ + --owl-file /path/to/ontology.owl + + # Index only (project already exists with git repo) + python scripts/seed-project.py \ + --project-id "db045aca-a6ce-4f1d-b06c-5fbe475c9e08" \ + --index-only + + # Configure upstream sync after import + python scripts/seed-project.py \ + --name "FOLIO" \ + --owl-url "https://raw.githubusercontent.com/alea-institute/FOLIO/main/FOLIO.owl" \ + --public \ + --upstream-repo "alea-institute/FOLIO" \ + --upstream-branch main \ + --upstream-file "FOLIO.owl" + +Environment: + DATABASE_URL - PostgreSQL connection string (reads from .env) + GIT_REPOS_BASE_PATH - Base path for bare git repos (reads from .env) +""" + +import argparse +import asyncio +import subprocess +import sys +import tempfile +import urllib.request +from pathlib import Path +from uuid import UUID, uuid4 + +from rdflib import Graph +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine +from sqlalchemy.orm import sessionmaker + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from ontokit.core.config import settings +from ontokit.services.ontology_index import OntologyIndexService + + +async def create_project( + session: AsyncSession, + project_id: UUID, + name: str, + description: str, + is_public: bool, + source_file_path: str, + owner_id: str = "anonymous", +) -> None: + """Insert project record into database.""" + await session.execute( + text(""" + INSERT INTO projects (id, name, description, is_public, owner_id, source_file_path, created_at, updated_at) + VALUES (:id, :name, :description, :is_public, :owner_id, :source_file_path, NOW(), NOW()) + ON CONFLICT (id) DO UPDATE SET + name = EXCLUDED.name, + description = EXCLUDED.description, + source_file_path = EXCLUDED.source_file_path, + updated_at = NOW() + """), + { + "id": str(project_id), + "name": name, + "description": description, + "is_public": is_public, + "owner_id": owner_id, + "source_file_path": source_file_path, + }, + ) + # Add owner as project member + await session.execute( + text(""" + INSERT INTO project_members (id, project_id, user_id, role, joined_at) + VALUES (:id, :project_id, :user_id, 'owner', NOW()) + ON CONFLICT (project_id, user_id) DO NOTHING + """), + {"id": str(uuid4()), "project_id": str(project_id), "user_id": owner_id}, + ) + await session.commit() + print(f" Project record created: {project_id}") + + +def create_git_repo(repo_path: Path, owl_content: bytes, filename: str) -> str: + """Create a bare git repo with the OWL file and return the commit hash.""" + if repo_path.exists(): + # Get existing commit hash + result = subprocess.run( + ["git", "-C", str(repo_path), "rev-parse", "HEAD"], + capture_output=True, text=True, + ) + if result.returncode == 0: + print(f" Git repo already exists at {repo_path}") + return result.stdout.strip() + + # Create bare repo + subprocess.run( + ["git", "init", "--bare", "--initial-branch=main", str(repo_path)], + check=True, capture_output=True, + ) + + # Clone to temp, add file, push + with tempfile.TemporaryDirectory() as tmpdir: + workdir = Path(tmpdir) / "work" + subprocess.run(["git", "clone", str(repo_path), str(workdir)], check=True, capture_output=True) + subprocess.run(["git", "-C", str(workdir), "config", "user.email", "ontokit@localhost"], check=True, capture_output=True) + subprocess.run(["git", "-C", str(workdir), "config", "user.name", "OntoKit"], check=True, capture_output=True) + + (workdir / filename).write_bytes(owl_content) + subprocess.run(["git", "-C", str(workdir), "add", filename], check=True, capture_output=True) + subprocess.run( + ["git", "-C", str(workdir), "commit", "-m", f"Import {filename}"], + check=True, capture_output=True, + ) + subprocess.run(["git", "-C", str(workdir), "push", "origin", "main"], check=True, capture_output=True) + + # Get commit hash + result = subprocess.run( + ["git", "-C", str(repo_path), "rev-parse", "HEAD"], + capture_output=True, text=True, check=True, + ) + commit_hash = result.stdout.strip() + print(f" Git repo created: {repo_path}") + print(f" Commit: {commit_hash}") + return commit_hash + + +async def build_index( + session: AsyncSession, + project_id: UUID, + branch: str, + owl_content: bytes, + commit_hash: str, + owl_format: str = "xml", +) -> int: + """Parse OWL file and build PostgreSQL ontology index.""" + print(f" Parsing OWL file ({len(owl_content)} bytes)...") + graph = Graph() + graph.parse(data=owl_content, format=owl_format) + print(f" Graph loaded: {len(graph)} triples") + + service = OntologyIndexService(session) + print(f" Building index...") + count = await service.full_reindex(project_id, branch, graph, commit_hash) + await session.commit() + print(f" Indexed {count} entities") + return count + + +async def configure_upstream_sync( + session: AsyncSession, + project_id: UUID, + repo_owner: str, + repo_name: str, + branch: str, + file_path: str, +) -> None: + """Configure upstream sync to track a GitHub repository.""" + await session.execute( + text(""" + INSERT INTO upstream_sync_configs (id, project_id, repo_owner, repo_name, branch, file_path, frequency, update_mode, enabled, status, created_at, updated_at) + VALUES (:id, :project_id, :repo_owner, :repo_name, :branch, :file_path, 'manual', 'auto_apply', true, 'idle', NOW(), NOW()) + ON CONFLICT (project_id) DO UPDATE SET + repo_owner = EXCLUDED.repo_owner, + repo_name = EXCLUDED.repo_name, + branch = EXCLUDED.branch, + file_path = EXCLUDED.file_path, + updated_at = NOW() + """), + { + "id": str(uuid4()), + "project_id": str(project_id), + "repo_owner": repo_owner, + "repo_name": repo_name, + "branch": branch, + "file_path": file_path, + }, + ) + await session.commit() + print(f" Upstream sync configured: {repo_owner}/{repo_name} ({branch}:{file_path})") + + +async def main(): + parser = argparse.ArgumentParser(description="Seed a project with an OWL file and build its index") + parser.add_argument("--name", help="Project name") + parser.add_argument("--description", default="", help="Project description") + parser.add_argument("--owl-url", help="URL to download OWL file from") + parser.add_argument("--owl-file", help="Local path to OWL file") + parser.add_argument("--owl-format", default="xml", help="RDFLib format (xml, turtle, n3, json-ld)") + parser.add_argument("--public", action="store_true", help="Make project public") + parser.add_argument("--project-id", help="Use a specific project UUID (default: auto-generate)") + parser.add_argument("--index-only", action="store_true", help="Only rebuild the index (project must exist)") + parser.add_argument("--skip-index", action="store_true", help="Skip index building") + parser.add_argument("--upstream-repo", help="GitHub repo for upstream sync (owner/name format)") + parser.add_argument("--upstream-branch", default="main", help="Upstream branch to track") + parser.add_argument("--upstream-file", help="File path in upstream repo") + args = parser.parse_args() + + # Validate args + if not args.index_only and not args.name: + parser.error("--name is required unless --index-only is set") + if not args.index_only and not args.owl_url and not args.owl_file: + parser.error("--owl-url or --owl-file is required unless --index-only is set") + + project_id = UUID(args.project_id) if args.project_id else uuid4() + repos_base = Path(settings.git_repos_base_path) + repo_path = repos_base / f"{project_id}.git" + + # Download or read OWL file + owl_content = None + if args.owl_url: + print(f"Downloading {args.owl_url}...") + with urllib.request.urlopen(args.owl_url) as response: + owl_content = response.read() + print(f" Downloaded {len(owl_content)} bytes") + elif args.owl_file: + owl_content = Path(args.owl_file).read_bytes() + print(f" Read {len(owl_content)} bytes from {args.owl_file}") + + # Determine filename + if args.owl_url: + filename = args.owl_url.split("/")[-1] + elif args.owl_file: + filename = Path(args.owl_file).name + else: + filename = "ontology.owl" + + engine = create_async_engine(str(settings.database_url)) + async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + + async with async_session() as session: + if args.index_only: + # Index-only mode: read OWL from existing git repo + print(f"Index-only mode for project {project_id}") + result = subprocess.run( + ["git", "-C", str(repo_path), "rev-parse", "HEAD"], + capture_output=True, text=True, check=True, + ) + commit_hash = result.stdout.strip() + + # Find the OWL file in the repo + result = subprocess.run( + ["git", "-C", str(repo_path), "ls-tree", "--name-only", "HEAD"], + capture_output=True, text=True, check=True, + ) + files = result.stdout.strip().split("\n") + owl_files = [f for f in files if f.endswith((".owl", ".ttl", ".rdf", ".n3"))] + if not owl_files: + print(f"ERROR: No OWL/TTL/RDF files found in repo") + sys.exit(1) + filename = owl_files[0] + + result = subprocess.run( + ["git", "-C", str(repo_path), "show", f"HEAD:{filename}"], + capture_output=True, check=True, + ) + owl_content = result.stdout + + fmt = "xml" if filename.endswith((".owl", ".rdf")) else "turtle" if filename.endswith(".ttl") else "n3" + await build_index(session, project_id, "main", owl_content, commit_hash, fmt) + else: + # Full seed: create project, git repo, index + print(f"Seeding project: {args.name} ({project_id})") + + # 1. Create project record + await create_project(session, project_id, args.name, args.description, args.public, filename) + + # 2. Create git repo with OWL file + commit_hash = create_git_repo(repo_path, owl_content, filename) + + # 3. Build index + if not args.skip_index: + await build_index(session, project_id, "main", owl_content, commit_hash, args.owl_format) + + # 4. Configure upstream sync + if args.upstream_repo: + parts = args.upstream_repo.split("/") + if len(parts) != 2: + print(f"ERROR: --upstream-repo must be 'owner/name' format, got '{args.upstream_repo}'") + else: + await configure_upstream_sync( + session, project_id, + repo_owner=parts[0], + repo_name=parts[1], + branch=args.upstream_branch, + file_path=args.upstream_file or filename, + ) + + await engine.dispose() + print(f"\nDone! Project ID: {project_id}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/unit/test_auth_disabled.py b/tests/unit/test_auth_disabled.py new file mode 100644 index 00000000..f2595f32 --- /dev/null +++ b/tests/unit/test_auth_disabled.py @@ -0,0 +1,126 @@ +"""Tests for the three auth modes: required, optional, disabled.""" + +from unittest.mock import patch + +import pytest +from fastapi import HTTPException + +from ontokit.core.auth import ( + ANONYMOUS_USER, + CurrentUser, + get_current_user, + get_current_user_optional, + get_current_user_with_token, +) + +# --------------------------------------------------------------------------- +# ANONYMOUS_USER constant +# --------------------------------------------------------------------------- + + +class TestAnonymousUser: + """Tests for the ANONYMOUS_USER constant.""" + + def test_anonymous_user_id(self) -> None: + """ANONYMOUS_USER has id='anonymous'.""" + assert ANONYMOUS_USER.id == "anonymous" + + def test_anonymous_user_roles(self) -> None: + """ANONYMOUS_USER has roles=['viewer'].""" + assert ANONYMOUS_USER.roles == ["viewer"] + + @patch("ontokit.core.auth.settings") + def test_anonymous_user_is_not_superadmin(self, mock_settings) -> None: # noqa: ANN001 + """ANONYMOUS_USER is never a superadmin.""" + mock_settings.superadmin_ids = set() + assert ANONYMOUS_USER.is_superadmin is False + + def test_anonymous_user_is_current_user_instance(self) -> None: + """ANONYMOUS_USER is an instance of CurrentUser.""" + assert isinstance(ANONYMOUS_USER, CurrentUser) + + +# --------------------------------------------------------------------------- +# AUTH_MODE=disabled +# --------------------------------------------------------------------------- + + +class TestAuthModeDisabled: + """Tests for AUTH_MODE=disabled — all functions return ANONYMOUS_USER.""" + + @pytest.mark.asyncio + @patch("ontokit.core.auth.settings") + async def test_disabled_get_current_user_returns_anonymous(self, mock_settings) -> None: # noqa: ANN001 + """In disabled mode, get_current_user returns ANONYMOUS_USER (no credentials needed).""" + mock_settings.auth_mode = "disabled" + result = await get_current_user(credentials=None) + assert result is ANONYMOUS_USER + + @pytest.mark.asyncio + @patch("ontokit.core.auth.settings") + async def test_disabled_get_current_user_optional_returns_anonymous(self, mock_settings) -> None: # noqa: ANN001 + """In disabled mode, get_current_user_optional returns ANONYMOUS_USER (not None).""" + mock_settings.auth_mode = "disabled" + result = await get_current_user_optional(credentials=None) + assert result is ANONYMOUS_USER + + @pytest.mark.asyncio + @patch("ontokit.core.auth.settings") + async def test_disabled_get_current_user_with_token_returns_anonymous(self, mock_settings) -> None: # noqa: ANN001 + """In disabled mode, get_current_user_with_token returns (ANONYMOUS_USER, 'anonymous').""" + mock_settings.auth_mode = "disabled" + user, token = await get_current_user_with_token(credentials=None) + assert user is ANONYMOUS_USER + assert token == "anonymous" + + +# --------------------------------------------------------------------------- +# AUTH_MODE=required (default) +# --------------------------------------------------------------------------- + + +class TestAuthModeRequired: + """Tests for AUTH_MODE=required — existing behavior, 401 without credentials.""" + + @pytest.mark.asyncio + @patch("ontokit.core.auth.settings") + async def test_required_get_current_user_raises_401_without_credentials(self, mock_settings) -> None: # noqa: ANN001 + """In required mode, get_current_user raises 401 when no credentials provided.""" + mock_settings.auth_mode = "required" + with pytest.raises(HTTPException) as exc_info: + await get_current_user(credentials=None) + assert exc_info.value.status_code == 401 + + @pytest.mark.asyncio + @patch("ontokit.core.auth.settings") + async def test_required_get_current_user_optional_returns_none_without_credentials(self, mock_settings) -> None: # noqa: ANN001 + """In required mode, get_current_user_optional returns None when no credentials provided.""" + mock_settings.auth_mode = "required" + result = await get_current_user_optional(credentials=None) + assert result is None + + +# --------------------------------------------------------------------------- +# AUTH_MODE=optional +# --------------------------------------------------------------------------- + + +class TestAuthModeOptional: + """Tests for AUTH_MODE=optional — GET endpoints work anonymously, writes require auth.""" + + @pytest.mark.asyncio + @patch("ontokit.core.auth.settings") + async def test_optional_get_current_user_raises_401_without_credentials(self, mock_settings) -> None: # noqa: ANN001 + """In optional mode, get_current_user (RequiredUser) raises 401 without credentials (write protection).""" + mock_settings.auth_mode = "optional" + with pytest.raises(HTTPException) as exc_info: + await get_current_user(credentials=None) + assert exc_info.value.status_code == 401 + + @pytest.mark.asyncio + @patch("ontokit.core.auth.settings") + async def test_optional_get_current_user_optional_returns_none_without_credentials(self, mock_settings) -> None: # noqa: ANN001 + """In optional mode, get_current_user_optional returns None without credentials (browse works).""" + mock_settings.auth_mode = "optional" + result = await get_current_user_optional(credentials=None) + assert result is None diff --git a/tests/unit/test_ontology_index_service.py b/tests/unit/test_ontology_index_service.py index b3e11a4a..ec3c0627 100644 --- a/tests/unit/test_ontology_index_service.py +++ b/tests/unit/test_ontology_index_service.py @@ -451,7 +451,7 @@ async def test_get_class_detail_found( mock_entity_result = MagicMock() mock_entity_result.scalar_one_or_none.return_value = entity - # labels, comments, parents, child_count, annotations + # labels, comments, parents, child_count, annotations, non-rdfs labels mock_labels = MagicMock() mock_labels.scalars.return_value.all.return_value = [] mock_comments = MagicMock() @@ -462,6 +462,8 @@ async def test_get_class_detail_found( mock_child_count.scalar.return_value = 0 mock_annotations = MagicMock() mock_annotations.scalars.return_value.all.return_value = [] + mock_non_rdfs_labels = MagicMock() + mock_non_rdfs_labels.scalars.return_value.all.return_value = [] mock_db.execute.side_effect = [ mock_entity_result, @@ -470,6 +472,7 @@ async def test_get_class_detail_found( mock_parents, mock_child_count, mock_annotations, + mock_non_rdfs_labels, ] result = await service.get_class_detail(PROJECT_ID, BRANCH, "http://example.org/Person") @@ -539,6 +542,10 @@ async def test_get_class_detail_with_labels_and_parents( mock_annotations = MagicMock() mock_annotations.scalars.return_value.all.return_value = [] + # Non-rdfs:label entries (translations/synonyms via skos:altLabel etc.) + mock_non_rdfs_labels = MagicMock() + mock_non_rdfs_labels.scalars.return_value.all.return_value = [] + mock_db.execute.side_effect = [ mock_entity_result, mock_labels, @@ -548,6 +555,7 @@ async def test_get_class_detail_with_labels_and_parents( mock_parent_labels, mock_child_count, mock_annotations, + mock_non_rdfs_labels, ] result = await service.get_class_detail(PROJECT_ID, BRANCH, "http://example.org/Person") @@ -883,6 +891,10 @@ async def test_get_class_detail_with_annotations( mock_annotations = MagicMock() mock_annotations.scalars.return_value.all.return_value = [ann] + # Non-rdfs:label entries (translations/synonyms via skos:altLabel etc.) + mock_non_rdfs_labels = MagicMock() + mock_non_rdfs_labels.scalars.return_value.all.return_value = [] + mock_db.execute.side_effect = [ mock_entity_result, mock_labels, @@ -890,6 +902,7 @@ async def test_get_class_detail_with_annotations( mock_parents, mock_child_count, mock_annotations, + mock_non_rdfs_labels, ] result = await service.get_class_detail(PROJECT_ID, BRANCH, "http://example.org/Thing") diff --git a/tests/unit/test_suggestion_service.py b/tests/unit/test_suggestion_service.py index 719c3d14..b3e29610 100644 --- a/tests/unit/test_suggestion_service.py +++ b/tests/unit/test_suggestion_service.py @@ -79,6 +79,10 @@ def _make_session( session.summary = None session.created_at = datetime.now(UTC) session.last_activity = last_activity or datetime.now(UTC) + # Anonymous-suggestion fields default to non-anonymous for existing tests. + session.is_anonymous = False + session.submitter_name = None + session.submitter_email = None return session