Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/classifai/servers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""This module provides functionality for creating or extending a REST-API service.
"""This module provides functionality for building and running ClassifAI as a REST API service.

This allows a user to call the search methods of one or more `VectorStore` objects,
from an API endpoint.

These functions interact with the ClassifAI Indexer module's `VectorStore` objects,
such that their `embed`, `search` and `reverse_search` methods are exposed on
REST-API endpoints, via a FastAPI app.
such that their `embed`, `search` and `reverse_search` methods are exposed as
REST API endpoints, as a FastAPI service started with these functions.

Full API documentation for FastAPI endpoints and Pydantic Models
can be found in autogenerated app Swagger docs at `/docs`.
Expand Down
39 changes: 20 additions & 19 deletions src/classifai/servers/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# pylint: disable=C0301
"""This module provides functionality for creating a start a restAPI service.
This allows a user to call the search methods of different VectorStore objects, from
an api-endpoint.
"""This module provides functionality for building and running ClassifAI as a REST API service.

These functions interact with the ClassifAI PackageIndexer modules
VectorStore objects, such that their embed and search methods are exposed on
restAPI endpoints, in a FastAPI restAPI service started with these functions.
This allows a user to call the search methods of one or more `VectorStore` objects,
from an API endpoint.

These functions interact with the ClassifAI Package's Indexer modules
`VectorStore` objects, such that their embed and search methods are exposed on
REST API endpoints, as a FastAPI service started with these functions.
"""

from __future__ import annotations
Expand Down Expand Up @@ -95,7 +96,7 @@ def get_router(vector_stores: list[VectorStore], endpoint_names: list[str]) -> A
make_endpoints(router, vector_stores_dict)

@router.get("/", description="UI accessibility", tags=["docs"])
def docs():
def docs() -> RedirectResponse:
"""Redirect users to the API documentation page.

Returns:
Expand Down Expand Up @@ -156,15 +157,15 @@ def run_server( # noqa: PLR0913
host_ip: str = "127.0.0.1",
log_level: str = "warning",
demo_mode: bool = False,
):
) -> None:
"""Create and run a `FastAPI` server with search endpoints.

Args:
vector_stores (list[VectorStore]): A list of `VectorStore` objects, each responsible for handling embedding and search operations for a specific endpoint.
endpoint_names (list[str]): A list of endpoint names corresponding to the `VectorStore`s to be exposed.
port (int): [optional] The port on which the API server will run. Defaults to 8000.
host_ip (str): [optional] The ip address that the api server runs on. Defaults to 127.0.0.1, note: default 127.0.0.1 exposes to connections from the same machine only, to expose for external connections use 0.0.0.0.
log_level ( str ): [optional] The level of logs for the uvicorn server, levels are ['critial', 'error', 'warning', 'log', 'debug'].
log_level (str): [optional] The level of logs for the uvicorn server, levels are ['critial', 'error', 'warning', 'log', 'debug'].
demo_mode (bool): [optional] Flag to show demo server info (Updates the openapi docs to show info indicating server is an api demo).

Raises:
Expand Down Expand Up @@ -198,7 +199,7 @@ def _set_demo_defaults(app: FastAPI):
app.description = "This is a demo of the ClassifAI server module"


def make_endpoints(main_router: APIRouter | FastAPI, vector_stores_dict: dict[str, VectorStore]):
def make_endpoints(main_router: APIRouter | FastAPI, vector_stores_dict: dict[str, VectorStore]) -> None:
"""Create and register the different endpoints to your app.

Args:
Expand All @@ -219,13 +220,13 @@ def make_endpoints(main_router: APIRouter | FastAPI, vector_stores_dict: dict[st
main_router.include_router(sub_router)


def _create_embedding_endpoint(router: APIRouter | FastAPI, endpoint_name: str, vector_store: VectorStore):
"""Create and register an embedding endpoint for a specific `VectorStore`.
def _create_embedding_endpoint(router: APIRouter | FastAPI, endpoint_name: str, vector_store: VectorStore) -> None:
"""Create and register an `embed` endpoint for a specific `VectorStore`.

Args:
router (APIRouter | FastAPI): The `FastAPI` application instance.
endpoint_name (str): The name of the endpoint to be created.
vector_store: The vector store object responsible for generating embeddings.
vector_store (VectorStore): The vector store object responsible for generating embeddings.

The created endpoint accepts POST requests with input data, generates embeddings
for the provided documents, and returns the results in a structured format.
Expand All @@ -250,13 +251,13 @@ async def embedding_endpoint(data: EmbedRequestSet) -> EmbedResponseBody:
return formatted_result


def _create_search_endpoint(router: APIRouter | FastAPI, endpoint_name: str, vector_store: VectorStore):
"""Create and register a search endpoint for a specific `VectorStore`.
def _create_search_endpoint(router: APIRouter | FastAPI, endpoint_name: str, vector_store: VectorStore) -> None:
"""Create and register a `search` endpoint for a specific `VectorStore`.

Args:
router (APIRouter | FastAPI): The `FastAPI` application instance.
endpoint_name (str): The name of the endpoint to be created.
vector_store: The `VectorStore` object responsible for performing search operations.
vector_store (VectorStore): The `VectorStore` object responsible for performing search operations.

The created endpoint accepts POST requests with input data and a query parameter
specifying the number of results to return. It performs a search operation using
Expand Down Expand Up @@ -295,13 +296,13 @@ async def search_endpoint(
return formatted_result


def _create_reverse_search_endpoint(router: APIRouter | FastAPI, endpoint_name: str, vector_store: VectorStore):
"""Create and register a reverse_search endpoint for a specific vector store.
def _create_reverse_search_endpoint(router: APIRouter | FastAPI, endpoint_name: str, vector_store: VectorStore) -> None:
"""Create and register a `reverse_search` endpoint for a specific vector store.

Args:
router (APIRouter | FastAPI): The `FastAPI` application instance.
endpoint_name (str): The name of the endpoint to be created.
vector_store: The `VectorStore` object responsible for performing search operations.
vector_store (VectorStore): The `VectorStore` object responsible for performing search operations.

The created endpoint accepts POST requests with input data and a query parameter
specifying the number of results to return. It performs a reverse search operation using
Expand Down
56 changes: 34 additions & 22 deletions src/classifai/servers/pydantic_models.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# pylint: disable=C0301
"""Pydantic Classes to model request and response data for ClassifAI FastAPI RESTful API."""
"""Pydantic Classes to model request and response data for ClassifAI FastAPI REST API."""

import pandas as pd
from pydantic import BaseModel, Extra, Field


class SearchRequestEntry(BaseModel):
"""Atomic model for a single row of `VectorStore` search method input data (i.e. a single query input) , includes `id` and
`query`.
"""Atomic model for a single row of `VectorStore` search method input data (i.e. a single query input).

Includes `id` and `query`.
"""

id: str = Field(examples=["1"])
Expand All @@ -18,11 +19,12 @@ class SearchRequestEntry(BaseModel):


class SearchRequestSet(BaseModel):
"""Model for a list of many `SearchRequestEntry` Pydantic models, i.e. several queries to be searched
in the `VectorStore`.
"""Model for a list of many `SearchRequestEntry` Pydantic models.

i.e. several search query `entries` to be searched in the `VectorStore`.
"""

entries: list[SearchRequestEntry] = Field(description="array of search queries to be searched in the VectorStore.")
entries: list[SearchRequestEntry] = Field(description="Array of search queries to be searched in the VectorStore.")


class SearchResponseEntry(BaseModel):
Expand All @@ -38,7 +40,10 @@ class Config:


class SearchResponseSet(BaseModel):
"""Model for a list of many `SearchResponseEntry` Pydantic models, representing a ranked list of `VectorStore` search results for a provided query."""
"""Model for a list of many `SearchResponseEntry` Pydantic models.

Representing a ranked list of `VectorStore` search results for a provided query.
"""

query_id: str = Field(description="The id of the query input for which these are the search results.")
query_text: str = Field(description="The text of the query input for which these are the search results.")
Expand All @@ -48,8 +53,9 @@ class SearchResponseSet(BaseModel):


class SearchResponseBody(BaseModel):
"""Model for the overall search response body, which includes a list of `SearchResponseSet` objects,
representing the search results for each input query.
"""Model for the overall search response body, which includes a list of `SearchResponseSet` objects.

Representing the search results for each input query.
"""

data: list[SearchResponseSet]
Expand All @@ -66,16 +72,18 @@ class ReverseSearchRequestEntry(BaseModel):


class ReverseSearchRequestSet(BaseModel):
"""Model for a list of many `ReverseSearchRequestEntry` Pydantic models, i.e. several `VectorStore` row entry
labels to be looked up in the `VectorStore`.
"""Model for a list of many `ReverseSearchRequestEntry` Pydantic models.

i.e. several `VectorStore` row entry labels to be looked up in the `VectorStore`.
"""

entries: list[ReverseSearchRequestEntry] = Field(description="array of VectorStore row entry labels to look up.")


class ReverseSearchResponseEntry(BaseModel):
"""Atomic model for single reverse search result entry, includes retrieved `doc_label` and `doc_text` which
are expected as str types.
"""Atomic model for single reverse search result entry.

Includes retrieved `doc_label` and `doc_text` which are expected as str types.
"""

doc_label: str
Expand All @@ -86,8 +94,9 @@ class Config:


class ReverseSearchResponseSet(BaseModel):
"""Model for a list of many `ReverseSearchResponseEntry` pydnatic models, representing a list of `VectorStore`
entries found (partially) matching an input 'searched_doc_label' and corresponding input `id`.
"""Model for a list of many `ReverseSearchResponseEntry` pydnatic models.

Representing a list of `VectorStore` entries found (partially) matching an input 'searched_doc_label' and corresponding input `id`.
"""

input_id: str = Field(
Expand All @@ -102,8 +111,9 @@ class ReverseSearchResponseSet(BaseModel):


class ReverseSearchResponseBody(BaseModel):
"""Model for the overall reverse search response body, which includes a list of `ReverseSearchResponseSet`
objects, representing the reverse search results for each input `VectorStore` row entry `id`.
"""Model for the overall reverse search response body, which includes a list of `ReverseSearchResponseSet` objects.

Representing the reverse search results for each input `VectorStore` row entry `id`.
"""

data: list[ReverseSearchResponseSet]
Expand All @@ -119,7 +129,10 @@ class EmbedRequestEntry(BaseModel):


class EmbedRequestSet(BaseModel):
"""Model for a list of many `EmbedRequestEntry` Pydantic models, representing several text strings to be embedded with the `VectorStore` embed method."""
"""Model for a list of many `EmbedRequestEntry` Pydantic models.

Representing several text strings to be embedded with the `VectorStore` embed method.
"""

entries: list[EmbedRequestEntry] = Field(
description="array of text entries to be embedded, with their corresponding text and id"
Expand Down Expand Up @@ -150,8 +163,7 @@ class EmbedResponseBody(BaseModel):
def convert_reverse_search_dataframe_to_pydantic_response(
df: pd.DataFrame, meta_data: dict, original_input: list[dict]
) -> ReverseSearchResponseBody:
"""Convert a `VectorStoreReverseSearchOutput` DataFrame into a JSON object conforming to the `ReverseSearchResponseBody` Pydantic
model.
"""Convert a `VectorStoreReverseSearchOutput` DataFrame into a JSON object conforming to the `ReverseSearchResponseBody` Pydantic model.

Args:
df (pd.DataFrame): Pandas DataFrame containing reverse search results.
Expand Down Expand Up @@ -235,8 +247,7 @@ def convert_reverse_search_dataframe_to_pydantic_response(


def convert_search_dataframe_to_pydantic_response(df: pd.DataFrame, meta_data: dict) -> SearchResponseBody:
"""Convert a `VectorStoreSearchOutput` DataFrame into a JSON object conforming to the `SearchResponseBody` Pydantic
model.
"""Convert a `VectorStoreSearchOutput` DataFrame into a JSON object conforming to the `SearchResponseBody` Pydantic model.

Args:
df (pd.DataFrame): Pandas DataFrame containing search results.
Expand Down Expand Up @@ -307,6 +318,7 @@ def convert_search_dataframe_to_pydantic_response(df: pd.DataFrame, meta_data: d

def convert_embedding_dataframe_to_pydantic_response(df: pd.DataFrame) -> EmbedResponseBody:
"""Convert a `VectorStoreEmbedOutput` DataFrame into a JSON object conforming to the `EmbedResponseBody` Pydantic model.

Unlike the conversion functions for search and reverse search, this function does not take in a `meta_data` dictionary as an argument,
as meta data comes from the `VectorStore` which is not accessed during the embedding process, and thus there are no reserved meta data columns
to check for. Instead, this function identifies any extra columns in the DataFrame that are not `id`, `text` or `embedding` as "hook" columns,
Expand Down
Loading