diff --git a/README.md b/README.md index 44ac4c4..9cff18a 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,28 @@ for result in client.search.semantic_iter("chronic kidney disease", page_size=50 print(f"{result['concept_id']}: {result['concept_name']}") ``` +### Bulk Search + +Search for multiple terms in a single API call — much faster than individual requests: + +```python +# Bulk lexical search (up to 50 queries) +results = client.search.bulk_basic([ + {"search_id": "q1", "query": "diabetes mellitus"}, + {"search_id": "q2", "query": "hypertension"}, + {"search_id": "q3", "query": "aspirin"}, +], defaults={"vocabulary_ids": ["SNOMED"], "page_size": 5}) + +for item in results["results"]: + print(f"{item['search_id']}: {len(item['results'])} results") + +# Bulk semantic search (up to 25 queries) +results = client.search.bulk_semantic([ + {"search_id": "s1", "query": "heart failure treatment options"}, + {"search_id": "s2", "query": "type 2 diabetes medication"}, +], defaults={"threshold": 0.5, "page_size": 10}) +``` + ### Similarity Search Find concepts similar to a known concept or natural language query: @@ -173,7 +195,7 @@ suggestions = client.concepts.suggest("diab", vocabulary_ids=["SNOMED"], page_si | Resource | Description | Key Methods | |----------|-------------|-------------| | `concepts` | Concept lookup and batch operations | `get()`, `get_by_code()`, `batch()`, `suggest()` | -| `search` | Full-text and semantic search | `basic()`, `advanced()`, `semantic()`, `semantic_iter()`, `similar()`, `fuzzy()` | +| `search` | Full-text and semantic search | `basic()`, `advanced()`, `semantic()`, `similar()`, `bulk_basic()`, `bulk_semantic()` | | `hierarchy` | Navigate concept relationships | `ancestors()`, `descendants()` | | `mappings` | Cross-vocabulary mappings | `get()`, `map()` | | `vocabularies` | Vocabulary metadata | `list()`, `get()`, `stats()` | diff --git a/src/omophub/resources/search.py b/src/omophub/resources/search.py index 2a98dda..3a03744 100644 --- a/src/omophub/resources/search.py +++ b/src/omophub/resources/search.py @@ -13,6 +13,12 @@ from ..types.common import PaginationMeta from ..types.concept import Concept from ..types.search import ( + BulkSearchDefaults, + BulkSearchInput, + BulkSearchResponse, + BulkSemanticSearchDefaults, + BulkSemanticSearchInput, + BulkSemanticSearchResponse, SearchResult, SemanticSearchResult, SimilarSearchResult, @@ -372,6 +378,77 @@ def fetch_page( yield from paginate_sync(fetch_page, page_size) + def bulk_basic( + self, + searches: list[BulkSearchInput], + *, + defaults: BulkSearchDefaults | None = None, + ) -> BulkSearchResponse: + """Execute multiple lexical searches in a single request. + + Sends up to 50 search queries in one API call. Each search can have + its own filters, or you can set shared defaults. + + Args: + searches: List of search inputs, each with a unique ``search_id`` + and ``query``. Max 50 items. + defaults: Default filters applied to all searches. Individual + search-level values override defaults. + + Returns: + Bulk results with per-search status, results, and timing. + + Example:: + + results = client.search.bulk_basic([ + {"search_id": "q1", "query": "diabetes"}, + {"search_id": "q2", "query": "hypertension"}, + ], defaults={"vocabulary_ids": ["SNOMED"], "page_size": 5}) + + for item in results["results"]: + print(item["search_id"], len(item["results"])) + """ + body: dict[str, Any] = {"searches": searches} + if defaults: + body["defaults"] = defaults + return self._request.post("/search/bulk", json_data=body) + + def bulk_semantic( + self, + searches: list[BulkSemanticSearchInput], + *, + defaults: BulkSemanticSearchDefaults | None = None, + ) -> BulkSemanticSearchResponse: + """Execute multiple semantic searches in a single request. + + Sends up to 25 natural-language queries in one API call using neural + embeddings. Each search can have its own filters and threshold. + + Args: + searches: List of search inputs, each with a unique ``search_id`` + and ``query`` (1-500 chars). Max 25 items. + defaults: Default filters applied to all searches. Individual + search-level values override defaults. + + Returns: + Bulk results with per-search status, similarity scores, and + optional query enhancements. + + Example:: + + results = client.search.bulk_semantic([ + {"search_id": "s1", "query": "heart failure treatment"}, + {"search_id": "s2", "query": "type 2 diabetes medication"}, + ], defaults={"threshold": 0.8, "page_size": 10}) + + for item in results["results"]: + print(item["search_id"], item.get("result_count", 0)) + """ + body: dict[str, Any] = {"searches": searches} + if defaults: + body["defaults"] = defaults + return self._request.post("/search/semantic-bulk", json_data=body) + def similar( self, *, @@ -630,6 +707,46 @@ async def semantic_iter( page += 1 + async def bulk_basic( + self, + searches: list[BulkSearchInput], + *, + defaults: BulkSearchDefaults | None = None, + ) -> BulkSearchResponse: + """Execute multiple lexical searches in a single request. + + Args: + searches: List of search inputs (max 50). + defaults: Default filters for all searches. + + Returns: + Bulk results with per-search status and results. + """ + body: dict[str, Any] = {"searches": searches} + if defaults: + body["defaults"] = defaults + return await self._request.post("/search/bulk", json_data=body) + + async def bulk_semantic( + self, + searches: list[BulkSemanticSearchInput], + *, + defaults: BulkSemanticSearchDefaults | None = None, + ) -> BulkSemanticSearchResponse: + """Execute multiple semantic searches in a single request. + + Args: + searches: List of search inputs (max 25). + defaults: Default filters for all searches. + + Returns: + Bulk results with per-search status and similarity scores. + """ + body: dict[str, Any] = {"searches": searches} + if defaults: + body["defaults"] = defaults + return await self._request.post("/search/semantic-bulk", json_data=body) + async def similar( self, *, diff --git a/src/omophub/types/__init__.py b/src/omophub/types/__init__.py index 33e4061..1ffe8ed 100644 --- a/src/omophub/types/__init__.py +++ b/src/omophub/types/__init__.py @@ -34,6 +34,15 @@ RelationshipType, ) from .search import ( + BulkSearchDefaults, + BulkSearchInput, + BulkSearchResponse, + BulkSearchResultItem, + BulkSemanticSearchDefaults, + BulkSemanticSearchInput, + BulkSemanticSearchResponse, + BulkSemanticSearchResultItem, + QueryEnhancement, SearchFacet, SearchFacets, SearchMetadata, @@ -53,16 +62,20 @@ ) __all__ = [ - # Common "APIResponse", - # Hierarchy "Ancestor", "BatchConceptResult", - # Concept + "BulkSearchDefaults", + "BulkSearchInput", + "BulkSearchResponse", + "BulkSearchResultItem", + "BulkSemanticSearchDefaults", + "BulkSemanticSearchInput", + "BulkSemanticSearchResponse", + "BulkSemanticSearchResultItem", "Concept", "ConceptSummary", "Descendant", - # Domain "Domain", "DomainCategory", "DomainStats", @@ -71,20 +84,18 @@ "ErrorResponse", "HierarchyPath", "HierarchySummary", - # Mapping "Mapping", "MappingContext", "MappingQuality", "MappingSummary", "PaginationMeta", "PaginationParams", + "QueryEnhancement", "RelatedConcept", - # Relationship "Relationship", "RelationshipSummary", "RelationshipType", "ResponseMeta", - # Search "SearchFacet", "SearchFacets", "SearchMetadata", @@ -96,7 +107,6 @@ "SimilarSearchResult", "Suggestion", "Synonym", - # Vocabulary "Vocabulary", "VocabularyDomain", "VocabularyStats", diff --git a/src/omophub/types/search.py b/src/omophub/types/search.py index 5898683..d761b08 100644 --- a/src/omophub/types/search.py +++ b/src/omophub/types/search.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, TypedDict -from typing_extensions import NotRequired +from typing_extensions import NotRequired, Required if TYPE_CHECKING: from .concept import Concept @@ -77,6 +77,112 @@ class SimilarSearchResult(TypedDict): search_metadata: SimilarSearchMetadata +# --------------------------------------------------------------------------- +# Bulk search types +# --------------------------------------------------------------------------- + + +class BulkSearchInput(TypedDict, total=False): + """Input for a single query in a bulk lexical search.""" + + search_id: Required[str] + query: Required[str] + vocabulary_ids: list[str] + domain_ids: list[str] + concept_class_ids: list[str] + standard_concept: str + include_invalid: bool + page_size: int + + +class BulkSearchDefaults(TypedDict, total=False): + """Default filters applied to all searches in a bulk lexical request.""" + + vocabulary_ids: list[str] + domain_ids: list[str] + concept_class_ids: list[str] + standard_concept: str + include_invalid: bool + page_size: int + + +class BulkSearchResultItem(TypedDict): + """Result for a single query in a bulk lexical search.""" + + search_id: str + query: str + results: list[dict[str, Any]] + status: str # "completed" | "failed" + error: NotRequired[str] + duration: NotRequired[int] + + +class BulkSearchResponse(TypedDict): + """Response from bulk lexical search.""" + + results: list[BulkSearchResultItem] + total_searches: int + completed_searches: int + failed_searches: int + + +class BulkSemanticSearchInput(TypedDict, total=False): + """Input for a single query in a bulk semantic search.""" + + search_id: Required[str] + query: Required[str] # 1-500 characters + page_size: int + threshold: float + vocabulary_ids: list[str] + domain_ids: list[str] + standard_concept: str + concept_class_id: str + + +class BulkSemanticSearchDefaults(TypedDict, total=False): + """Default filters applied to all searches in a bulk semantic request.""" + + page_size: int + threshold: float + vocabulary_ids: list[str] + domain_ids: list[str] + standard_concept: str + concept_class_id: str + + +class QueryEnhancement(TypedDict, total=False): + """Query enhancement info from semantic search.""" + + original_query: str + enhanced_query: str + abbreviations_expanded: list[str] + misspellings_corrected: list[str] + + +class BulkSemanticSearchResultItem(TypedDict): + """Result for a single query in a bulk semantic search.""" + + search_id: str + query: str + results: list[dict[str, Any]] + status: str # "completed" | "failed" + error: NotRequired[str] + similarity_threshold: NotRequired[float] + result_count: NotRequired[int] + duration: NotRequired[int] + query_enhancement: NotRequired[QueryEnhancement] + + +class BulkSemanticSearchResponse(TypedDict): + """Response from bulk semantic search.""" + + results: list[BulkSemanticSearchResultItem] + total_searches: int + completed_count: int + failed_count: int + total_duration: NotRequired[int] + + class SearchFacet(TypedDict): """Search facet with count.""" diff --git a/tests/integration/test_search.py b/tests/integration/test_search.py index ad9e527..df4b311 100644 --- a/tests/integration/test_search.py +++ b/tests/integration/test_search.py @@ -298,3 +298,100 @@ def test_similar_with_vocabulary_filter(self, integration_client: OMOPHub) -> No # If results, all should be from SNOMED for concept in similar: assert concept.get("vocabulary_id") == "SNOMED" + + +@pytest.mark.integration +class TestBulkBasicSearchIntegration: + """Integration tests for bulk lexical search.""" + + def test_bulk_basic_multiple_queries(self, integration_client: OMOPHub) -> None: + """Test bulk basic search with multiple queries.""" + result = integration_client.search.bulk_basic([ + {"search_id": "q1", "query": "diabetes mellitus"}, + {"search_id": "q2", "query": "hypertension"}, + {"search_id": "q3", "query": "aspirin"}, + ], defaults={"page_size": 5}) + + results = extract_data(result, "results") + assert len(results) == 3 + + # Verify all 3 search IDs are present with results + returned_ids = {item["search_id"] for item in results} + assert returned_ids == {"q1", "q2", "q3"} + for item in results: + assert item["status"] == "completed" + assert len(item["results"]) > 0 + + def test_bulk_basic_with_vocabulary_filter(self, integration_client: OMOPHub) -> None: + """Test bulk basic search with shared vocabulary filter.""" + result = integration_client.search.bulk_basic([ + {"search_id": "snomed1", "query": "diabetes"}, + {"search_id": "snomed2", "query": "myocardial infarction"}, + ], defaults={"vocabulary_ids": ["SNOMED"], "page_size": 3}) + + results = extract_data(result, "results") + for item in results: + assert item["status"] == "completed" + # Verify SNOMED filter applied + for concept in item["results"]: + assert concept.get("vocabulary_id") == "SNOMED" + + def test_bulk_basic_single_query(self, integration_client: OMOPHub) -> None: + """Test bulk basic search with a single query.""" + result = integration_client.search.bulk_basic([ + {"search_id": "single", "query": "metformin", "page_size": 3}, + ]) + + results = extract_data(result, "results") + assert len(results) == 1 + assert results[0]["search_id"] == "single" + assert results[0]["status"] == "completed" + + +@pytest.mark.integration +class TestBulkSemanticSearchIntegration: + """Integration tests for bulk semantic search.""" + + def test_bulk_semantic_multiple_queries(self, integration_client: OMOPHub) -> None: + """Test bulk semantic search with multiple natural-language queries.""" + result = integration_client.search.bulk_semantic([ + {"search_id": "s1", "query": "heart failure treatment options"}, + {"search_id": "s2", "query": "type 2 diabetes medication"}, + ], defaults={"threshold": 0.5, "page_size": 5}) + + results = extract_data(result, "results") + assert len(results) == 2 + + for item in results: + assert item["search_id"] in ("s1", "s2") + assert item["status"] == "completed" + + def test_bulk_semantic_with_filters(self, integration_client: OMOPHub) -> None: + """Test bulk semantic search with vocabulary and domain filters.""" + result = integration_client.search.bulk_semantic([ + { + "search_id": "filtered", + "query": "pain relief medication", + "vocabulary_ids": ["SNOMED"], + "page_size": 3, + "threshold": 0.5, + }, + ]) + + results = extract_data(result, "results") + assert len(results) == 1 + assert results[0]["status"] == "completed" + + # Verify SNOMED vocabulary filter was applied + for concept in results[0]["results"]: + assert concept.get("vocabulary_id") == "SNOMED" + + def test_bulk_semantic_single_query(self, integration_client: OMOPHub) -> None: + """Test bulk semantic search with a single query.""" + result = integration_client.search.bulk_semantic([ + {"search_id": "one", "query": "elevated blood pressure", "threshold": 0.5}, + ]) + + results = extract_data(result, "results") + assert len(results) == 1 + assert results[0]["search_id"] == "one" diff --git a/tests/unit/resources/test_search.py b/tests/unit/resources/test_search.py index a355ae0..2b8348f 100644 --- a/tests/unit/resources/test_search.py +++ b/tests/unit/resources/test_search.py @@ -850,3 +850,139 @@ async def test_async_similar_with_all_options( assert body["include_invalid"] is True assert body["include_scores"] is True assert body["include_explanations"] is True + + +class TestBulkBasicSearch: + """Tests for bulk lexical search.""" + + @respx.mock + def test_bulk_basic(self, sync_client: OMOPHub, base_url: str) -> None: + """Test bulk basic search with multiple queries.""" + mock_response = { + "success": True, + "data": { + "results": [ + { + "search_id": "q1", + "query": "diabetes", + "results": [{"concept_id": 201826, "concept_name": "Type 2 diabetes"}], + "status": "completed", + "duration": 15, + }, + { + "search_id": "q2", + "query": "hypertension", + "results": [{"concept_id": 316866, "concept_name": "Hypertensive disorder"}], + "status": "completed", + "duration": 12, + }, + ], + "total_searches": 2, + "completed_searches": 2, + "failed_searches": 0, + }, + } + respx.post(f"{base_url}/search/bulk").mock( + return_value=Response(200, json=mock_response) + ) + + result = sync_client.search.bulk_basic([ + {"search_id": "q1", "query": "diabetes"}, + {"search_id": "q2", "query": "hypertension"}, + ]) + + assert len(result["results"]) == 2 + assert result["total_searches"] == 2 + assert result["completed_searches"] == 2 + assert result["results"][0]["search_id"] == "q1" + + @respx.mock + def test_bulk_basic_with_defaults(self, sync_client: OMOPHub, base_url: str) -> None: + """Test bulk basic search with shared defaults.""" + import json + + respx.post(f"{base_url}/search/bulk").mock( + return_value=Response(200, json={ + "success": True, + "data": { + "results": [], + "total_searches": 1, + "completed_searches": 1, + "failed_searches": 0, + }, + }) + ) + + sync_client.search.bulk_basic( + [{"search_id": "q1", "query": "diabetes"}], + defaults={"vocabulary_ids": ["SNOMED"], "page_size": 5}, + ) + + request_body = json.loads(respx.calls[0].request.content) + assert request_body["defaults"]["vocabulary_ids"] == ["SNOMED"] + assert request_body["defaults"]["page_size"] == 5 + + +class TestBulkSemanticSearch: + """Tests for bulk semantic search.""" + + @respx.mock + def test_bulk_semantic(self, sync_client: OMOPHub, base_url: str) -> None: + """Test bulk semantic search with multiple queries.""" + mock_response = { + "success": True, + "data": { + "results": [ + { + "search_id": "s1", + "query": "heart failure treatment", + "results": [{"concept_id": 316139, "similarity_score": 0.92}], + "status": "completed", + "result_count": 1, + "duration": 45, + }, + ], + "total_searches": 1, + "completed_count": 1, + "failed_count": 0, + "total_duration": 45, + }, + } + respx.post(f"{base_url}/search/semantic-bulk").mock( + return_value=Response(200, json=mock_response) + ) + + result = sync_client.search.bulk_semantic([ + {"search_id": "s1", "query": "heart failure treatment"}, + ]) + + assert len(result["results"]) == 1 + assert result["completed_count"] == 1 + assert result["results"][0]["status"] == "completed" + + @respx.mock + def test_bulk_semantic_with_defaults(self, sync_client: OMOPHub, base_url: str) -> None: + """Test bulk semantic search with shared defaults.""" + import json + + respx.post(f"{base_url}/search/semantic-bulk").mock( + return_value=Response(200, json={ + "success": True, + "data": { + "results": [], + "total_searches": 1, + "completed_count": 1, + "failed_count": 0, + }, + }) + ) + + sync_client.search.bulk_semantic( + [{"search_id": "s1", "query": "diabetes medications"}], + defaults={"threshold": 0.8, "page_size": 10, "vocabulary_ids": ["SNOMED"]}, + ) + + request_body = json.loads(respx.calls[0].request.content) + assert request_body["defaults"]["threshold"] == 0.8 + assert request_body["defaults"]["page_size"] == 10 + assert request_body["defaults"]["vocabulary_ids"] == ["SNOMED"]