Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions wikidatasearch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,17 @@ class Settings(BaseSettings):

JINA_API_KEY: str | None = None

WD_TEXTIFIER_API: str = "https://wd-textify.wmcloud.org"

ANALYTICS_API_SECRET: str | None = None

# Database settings for logging
DB_NAME: str = "logs"
DB_USER: str = ""
Comment thread
philippesaade-wmde marked this conversation as resolved.
DB_PASS: str = ""
DB_HOST: str = "requestsDB"
DB_PORT: int = 3306

model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")


Expand Down
9 changes: 4 additions & 5 deletions wikidatasearch/routes/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ async def item_query_route(
K: int = Query(
settings.MAX_VECTORDB_K,
ge=1,
Comment thread
philippesaade-wmde marked this conversation as resolved.
le=settings.MAX_VECTORDB_K,
description="Number of top results to return",
),
instanceof: Optional[str] = Query(
Expand Down Expand Up @@ -117,12 +116,12 @@ async def item_query_route(

if not query:
response = "Query is missing"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)

if K > settings.MAX_VECTORDB_K:
response = f"K must be less than {settings.MAX_VECTORDB_K}"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)

# Build filter
Expand All @@ -131,7 +130,7 @@ async def item_query_route(
qids = [qid.strip() for qid in instanceof.split(",") if qid.strip()]
if not qids:
response = "Invalid instanceof filter"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)
filt["metadata.InstanceOf"] = {"$in": qids}

Expand All @@ -152,6 +151,6 @@ async def item_query_route(
return results

except Exception as e:
Logger.add_request(request, 500, start_time, error=str(e))
background_tasks.add_task(Logger.add_request, request, 500, start_time, error=str(e))
traceback.print_exc()
raise HTTPException(status_code=500, detail="Internal Server Error")
9 changes: 4 additions & 5 deletions wikidatasearch/routes/property.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ async def property_query_route(
K: int = Query(
settings.MAX_VECTORDB_K,
ge=1,
le=settings.MAX_VECTORDB_K,
description="Number of top results to return",
),
instanceof: Optional[str] = Query(
Expand Down Expand Up @@ -118,20 +117,20 @@ async def property_query_route(

if not query:
response = "Query is missing"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)

if K > settings.MAX_VECTORDB_K:
response = f"K must be less than {settings.MAX_VECTORDB_K}"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)

filt = {"metadata.IsProperty": True}
if instanceof:
qids = [qid.strip() for qid in instanceof.split(",") if qid.strip()]
if not qids:
response = "Invalid instanceof filter"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)
filt["metadata.InstanceOf"] = {"$in": qids}

Expand All @@ -154,6 +153,6 @@ async def property_query_route(
return results

except Exception as e:
Logger.add_request(request, 500, start_time, error=str(e))
background_tasks.add_task(Logger.add_request, request, 500, start_time, error=str(e))
traceback.print_exc()
raise HTTPException(status_code=500, detail="Internal Server Error")
10 changes: 5 additions & 5 deletions wikidatasearch/routes/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,24 @@ async def similarity_score_route(

if not query:
response = "Query is missing"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)

if not qid:
response = "QIDs are missing"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)

qids = [q.strip() for q in qid.split(",") if q.strip()]
if not qids:
response = "No valid QIDs provided"
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)

MAX_NQIDs = 100
if len(qids) > MAX_NQIDs:
response = "Too many QIDs provided. Please provide 100 or fewer QIDs."
Logger.add_request(request, 422, start_time, error=response)
background_tasks.add_task(Logger.add_request, request, 422, start_time, error=response)
raise HTTPException(status_code=422, detail=response)

try:
Expand All @@ -135,6 +135,6 @@ async def similarity_score_route(
return results

except Exception as e:
Logger.add_request(request, 500, start_time, error=str(e))
background_tasks.add_task(Logger.add_request, request, 500, start_time, error=str(e))
traceback.print_exc()
raise HTTPException(status_code=500, detail="Internal Server Error")
13 changes: 7 additions & 6 deletions wikidatasearch/services/logger.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Logging service for the FastAPI application."""

import os
import re
import time
import traceback
Expand All @@ -21,15 +20,17 @@
from sqlalchemy.dialects.mysql import JSON
from sqlalchemy.orm import declarative_base, sessionmaker

from ...config import settings

"""
MySQL database setup for storing Wikidata labels in all languages.
"""

DB_HOST = os.environ["DB_HOST"]
DB_NAME = os.environ["DB_NAME"]
DB_USER = os.environ["DB_USER"]
DB_PASS = os.environ["DB_PASS"]
DB_PORT = int(os.environ.get("DB_PORT", "3306"))
DB_HOST = settings.DB_HOST
DB_NAME = settings.DB_NAME
DB_USER = settings.DB_USER
DB_PASS = settings.DB_PASS
DB_PORT = settings.DB_PORT

DATABASE_URL = f"mariadb+pymysql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}?charset=utf8mb4"

Expand Down
8 changes: 5 additions & 3 deletions wikidatasearch/services/search/Search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Abstract interfaces and shared helpers for search implementations."""

import os
from abc import ABC, abstractmethod

import requests
Expand Down Expand Up @@ -41,6 +40,9 @@ def get_text_by_ids(
Returns:
dict[str, str]: Mapping from entity ID to textual representation.
"""
# Lazy import avoids circular dependency: config -> services.search -> Search.
from ...config import settings

if (not bool(lang)) or (lang == "all"):
lang = "en"

Expand All @@ -50,8 +52,8 @@ def get_text_by_ids(
params = {"id": qid, "lang": lang, "external_ids": False, "format": format}
headers = {"User-Agent": "Wikidata Vector Database (embedding@wikimedia.de)"}

url = os.environ.get("WD_TEXTIFIER_API", "https://wd-textify.wmcloud.org")
results = requests.get(url, params=params, headers=headers)
url_textifier = settings.WD_TEXTIFIER_API
results = requests.get(url_textifier, params=params, headers=headers)
results.raise_for_status()
text.update(results.json())

Expand Down
Loading