diff --git a/wikidatasearch/config.py b/wikidatasearch/config.py index 60dacfb..456c7b9 100644 --- a/wikidatasearch/config.py +++ b/wikidatasearch/config.py @@ -24,8 +24,17 @@ class Settings(BaseSettings): JINA_API_KEY: str | None = None + WD_TEXTIFIER_API: str = "https://wd-textify.wmcloud.org" + ANALYTICS_API_SECRET: str | None = None + # Database settings for logging + DB_NAME: str = "logs" + DB_USER: str = "" + DB_PASS: str = "" + DB_HOST: str = "requestsDB" + DB_PORT: int = 3306 + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8") diff --git a/wikidatasearch/routes/item.py b/wikidatasearch/routes/item.py index d2214c9..10cf230 100644 --- a/wikidatasearch/routes/item.py +++ b/wikidatasearch/routes/item.py @@ -74,7 +74,6 @@ async def item_query_route( K: int = Query( settings.MAX_VECTORDB_K, ge=1, - le=settings.MAX_VECTORDB_K, description="Number of top results to return", ), instanceof: Optional[str] = Query( @@ -117,12 +116,12 @@ async def item_query_route( if not query: response = "Query is missing" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) if K > settings.MAX_VECTORDB_K: response = f"K must be less than {settings.MAX_VECTORDB_K}" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) # Build filter @@ -131,7 +130,7 @@ async def item_query_route( qids = [qid.strip() for qid in instanceof.split(",") if qid.strip()] if not qids: response = "Invalid instanceof filter" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) filt["metadata.InstanceOf"] = {"$in": qids} @@ -152,6 +151,6 @@ async def item_query_route( return results except Exception as e: - Logger.add_request(request, 500, start_time, error=str(e)) + background_tasks.add_task(Logger.add_request, request, 500, start_time, error=str(e)) traceback.print_exc() raise HTTPException(status_code=500, detail="Internal Server Error") diff --git a/wikidatasearch/routes/property.py b/wikidatasearch/routes/property.py index 85a165d..41e0ac6 100644 --- a/wikidatasearch/routes/property.py +++ b/wikidatasearch/routes/property.py @@ -70,7 +70,6 @@ async def property_query_route( K: int = Query( settings.MAX_VECTORDB_K, ge=1, - le=settings.MAX_VECTORDB_K, description="Number of top results to return", ), instanceof: Optional[str] = Query( @@ -118,12 +117,12 @@ async def property_query_route( if not query: response = "Query is missing" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) if K > settings.MAX_VECTORDB_K: response = f"K must be less than {settings.MAX_VECTORDB_K}" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) filt = {"metadata.IsProperty": True} @@ -131,7 +130,7 @@ async def property_query_route( qids = [qid.strip() for qid in instanceof.split(",") if qid.strip()] if not qids: response = "Invalid instanceof filter" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) filt["metadata.InstanceOf"] = {"$in": qids} @@ -154,6 +153,6 @@ async def property_query_route( return results except Exception as e: - Logger.add_request(request, 500, start_time, error=str(e)) + background_tasks.add_task(Logger.add_request, request, 500, start_time, error=str(e)) traceback.print_exc() raise HTTPException(status_code=500, detail="Internal Server Error") diff --git a/wikidatasearch/routes/similarity.py b/wikidatasearch/routes/similarity.py index d23e7e9..f962653 100644 --- a/wikidatasearch/routes/similarity.py +++ b/wikidatasearch/routes/similarity.py @@ -103,24 +103,24 @@ async def similarity_score_route( if not query: response = "Query is missing" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) if not qid: response = "QIDs are missing" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) qids = [q.strip() for q in qid.split(",") if q.strip()] if not qids: response = "No valid QIDs provided" - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) MAX_NQIDs = 100 if len(qids) > MAX_NQIDs: response = "Too many QIDs provided. Please provide 100 or fewer QIDs." - Logger.add_request(request, 422, start_time, error=response) + background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response) raise HTTPException(status_code=422, detail=response) try: @@ -135,6 +135,6 @@ async def similarity_score_route( return results except Exception as e: - Logger.add_request(request, 500, start_time, error=str(e)) + background_tasks.add_task(Logger.add_request, request, 500, start_time, error=str(e)) traceback.print_exc() raise HTTPException(status_code=500, detail="Internal Server Error") diff --git a/wikidatasearch/services/logger.py b/wikidatasearch/services/logger.py index 2d0736e..1203fe4 100644 --- a/wikidatasearch/services/logger.py +++ b/wikidatasearch/services/logger.py @@ -1,6 +1,5 @@ """Logging service for the FastAPI application.""" -import os import re import time import traceback @@ -21,15 +20,17 @@ from sqlalchemy.dialects.mysql import JSON from sqlalchemy.orm import declarative_base, sessionmaker +from ...config import settings + """ MySQL database setup for storing Wikidata labels in all languages. """ -DB_HOST = os.environ["DB_HOST"] -DB_NAME = os.environ["DB_NAME"] -DB_USER = os.environ["DB_USER"] -DB_PASS = os.environ["DB_PASS"] -DB_PORT = int(os.environ.get("DB_PORT", "3306")) +DB_HOST = settings.DB_HOST +DB_NAME = settings.DB_NAME +DB_USER = settings.DB_USER +DB_PASS = settings.DB_PASS +DB_PORT = settings.DB_PORT DATABASE_URL = f"mariadb+pymysql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}?charset=utf8mb4" diff --git a/wikidatasearch/services/search/Search.py b/wikidatasearch/services/search/Search.py index faa012e..2fd81ca 100644 --- a/wikidatasearch/services/search/Search.py +++ b/wikidatasearch/services/search/Search.py @@ -1,6 +1,5 @@ """Abstract interfaces and shared helpers for search implementations.""" -import os from abc import ABC, abstractmethod import requests @@ -41,6 +40,9 @@ def get_text_by_ids( Returns: dict[str, str]: Mapping from entity ID to textual representation. """ + # Lazy import avoids circular dependency: config -> services.search -> Search. + from ...config import settings + if (not bool(lang)) or (lang == "all"): lang = "en" @@ -50,8 +52,8 @@ def get_text_by_ids( params = {"id": qid, "lang": lang, "external_ids": False, "format": format} headers = {"User-Agent": "Wikidata Vector Database (embedding@wikimedia.de)"} - url = os.environ.get("WD_TEXTIFIER_API", "https://wd-textify.wmcloud.org") - results = requests.get(url, params=params, headers=headers) + url_textifier = settings.WD_TEXTIFIER_API + results = requests.get(url_textifier, params=params, headers=headers) results.raise_for_status() text.update(results.json())