From 2361971a3a23de896d556561b816b5b62f7db743 Mon Sep 17 00:00:00 2001 From: Christophe Lapointe Date: Wed, 13 May 2026 16:49:22 -0400 Subject: [PATCH 1/8] feat: add FastAPI microservice wrapper --- server.py | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 server.py diff --git a/server.py b/server.py new file mode 100644 index 000000000..ff0e8f100 --- /dev/null +++ b/server.py @@ -0,0 +1,114 @@ +import os +import tempfile +import anthropic +from fastapi import FastAPI, File, HTTPException, UploadFile +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel + +from pageindex.client import PageIndexClient + +app = FastAPI(title="PageIndex API") + +app.add_middleware( + CORSMiddleware, + allow_origins=os.environ.get("ALLOWED_ORIGINS", "*").split(","), + allow_methods=["POST", "GET"], + allow_headers=["*"], +) + +WORKSPACE_DIR = os.environ.get("WORKSPACE_DIR", "./workspace") +os.makedirs(WORKSPACE_DIR, exist_ok=True) + +pi_client = PageIndexClient( + model=os.environ.get("PAGEINDEX_MODEL", "claude-sonnet-4-6"), + workspace=WORKSPACE_DIR, +) + +anthropic_client = anthropic.Anthropic() + + +@app.get("/health") +def health(): + return {"status": "ok"} + + +@app.post("/index") +async def index_document(file: UploadFile = File(...)): + if not file.filename.lower().endswith(".pdf"): + raise HTTPException(status_code=400, detail="Only PDF files are supported") + + with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: + tmp.write(await file.read()) + tmp_path = tmp.name + + try: + doc_id = pi_client.index(tmp_path) + return {"doc_id": doc_id, "filename": file.filename} + finally: + os.unlink(tmp_path) + + +class QueryRequest(BaseModel): + doc_id: str + question: str + + +@app.post("/query") +def query_document(req: QueryRequest): + structure = pi_client.get_document_structure(req.doc_id) + + tools = [ + { + "name": "get_page_content", + "description": "Retrieve the text of specific pages from the document.", + "input_schema": { + "type": "object", + "properties": { + "pages": { + "type": "string", + "description": "Pages to fetch. Examples: '5-7', '3,8', '12'.", + } + }, + "required": ["pages"], + }, + } + ] + + messages = [ + { + "role": "user", + "content": f"Document structure:\n{structure}\n\nQuestion: {req.question}", + } + ] + + for _ in range(10): + response = anthropic_client.messages.create( + model="claude-sonnet-4-6", + max_tokens=2048, + system=( + "You are a document analysis assistant. " + "Use the document structure to navigate the document and answer the question. " + "Always cite the page numbers where you found the information." + ), + tools=tools, + messages=messages, + ) + + if response.stop_reason == "end_turn": + answer = next( + (b.text for b in response.content if hasattr(b, "text")), "" + ) + return {"answer": answer} + + if response.stop_reason == "tool_use": + tool_results = [] + for block in response.content: + if block.type == "tool_use" and block.name == "get_page_content": + content = pi_client.get_page_content(req.doc_id, block.input["pages"]) + tool_results.append( + {"type": "tool_result", "tool_use_id": block.id, "content": content} + ) + messages.append({"role": "assistant", "content": response.content}) + messages.append({"role": "user", "content": tool_results}) + + raise HTTPException(status_code=500, detail="Could not answer the question.") From 38d1e64e97c98b230d932e77d8950349630b590e Mon Sep 17 00:00:00 2001 From: Christophe Lapointe Date: Wed, 13 May 2026 16:49:23 -0400 Subject: [PATCH 2/8] feat: add FastAPI microservice wrapper --- requirements-server.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 requirements-server.txt diff --git a/requirements-server.txt b/requirements-server.txt new file mode 100644 index 000000000..b987426d0 --- /dev/null +++ b/requirements-server.txt @@ -0,0 +1,4 @@ +fastapi==0.115.0 +uvicorn[standard]==0.30.6 +python-multipart==0.0.9 +anthropic>=0.40.0 From b927b05862b0eaee58ebbe85263a018662d62d4d Mon Sep 17 00:00:00 2001 From: Christophe Lapointe Date: Wed, 13 May 2026 16:49:23 -0400 Subject: [PATCH 3/8] feat: add FastAPI microservice wrapper --- Dockerfile | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..01015057b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt requirements-server.txt ./ +RUN pip install --no-cache-dir -r requirements.txt -r requirements-server.txt + +COPY . . + +ENV WORKSPACE_DIR=/data/workspace +RUN mkdir -p /data/workspace + +EXPOSE 8000 + +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"] From cb7e0580b22c2e5353c2e0396c1097ff03597ad2 Mon Sep 17 00:00:00 2001 From: Christophe Lapointe Date: Thu, 14 May 2026 09:12:47 -0400 Subject: [PATCH 4/8] feat: add docker-compose for Azure deployment --- docker-compose.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..19658490b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +services: + pageindex: + build: . + restart: unless-stopped + ports: + - "8000:8000" + environment: + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - WORKSPACE_DIR=/data/workspace + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-*} + - API_SECRET=${API_SECRET:-} + volumes: + - pageindex_data:/data/workspace + +volumes: + pageindex_data: From 589f96cc705fc29c7c7c49f5617e4f4d01f8e6d4 Mon Sep 17 00:00:00 2001 From: Christophe Lapointe Date: Thu, 14 May 2026 09:12:56 -0400 Subject: [PATCH 5/8] feat: add Azure VM setup script --- setup.sh | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 setup.sh diff --git a/setup.sh b/setup.sh new file mode 100644 index 000000000..878779bd6 --- /dev/null +++ b/setup.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# PageIndex Azure VM setup script +# Run as root on a fresh Ubuntu 22.04 VM +set -e + +echo "=== Installing Docker ===" +apt-get update -qq +apt-get install -y docker.io docker-compose-plugin git curl + +systemctl enable docker +systemctl start docker + +echo "=== Cloning PageIndex ===" +git clone https://github.com/clapointe-carbonleo/PageIndex.git /opt/pageindex +cd /opt/pageindex +git checkout feat/fastapi-server + +echo "=== Creating .env file ===" +cat > /opt/pageindex/.env << 'ENVEOF' +# Required — Anthropic API key for Claude +ANTHROPIC_API_KEY=sk-ant-REPLACE_ME + +# Allowed origins (comma-separated) — set to your mike-legal Vercel URL +ALLOWED_ORIGINS=https://mike-legal-three.vercel.app + +# Optional — secret token to protect the API (add to PAGEINDEX_SECRET in Vercel) +API_SECRET=REPLACE_WITH_RANDOM_SECRET +ENVEOF + +echo "=== Building and starting service ===" +cd /opt/pageindex +docker compose up -d --build + +echo "" +echo "=== Done! ===" +echo "PageIndex service is running on port 8000." +echo "Edit /opt/pageindex/.env to set your real API keys, then run: docker compose restart" From 40070a06044a3df5d206cd8c5292dd334a19b8fc Mon Sep 17 00:00:00 2001 From: Christophe Lapointe Date: Thu, 14 May 2026 09:13:13 -0400 Subject: [PATCH 6/8] feat: add API secret header guard --- server.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/server.py b/server.py index ff0e8f100..a791e20b0 100644 --- a/server.py +++ b/server.py @@ -1,7 +1,7 @@ import os import tempfile import anthropic -from fastapi import FastAPI, File, HTTPException, UploadFile +from fastapi import FastAPI, File, HTTPException, Header, UploadFile from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel @@ -17,6 +17,7 @@ ) WORKSPACE_DIR = os.environ.get("WORKSPACE_DIR", "./workspace") +API_SECRET = os.environ.get("API_SECRET", "") os.makedirs(WORKSPACE_DIR, exist_ok=True) pi_client = PageIndexClient( @@ -27,13 +28,22 @@ anthropic_client = anthropic.Anthropic() +def verify_secret(x_api_secret: str = Header(default="")): + if API_SECRET and x_api_secret != API_SECRET: + raise HTTPException(status_code=401, detail="Invalid API secret.") + + @app.get("/health") def health(): return {"status": "ok"} @app.post("/index") -async def index_document(file: UploadFile = File(...)): +async def index_document( + file: UploadFile = File(...), + x_api_secret: str = Header(default=""), +): + verify_secret(x_api_secret) if not file.filename.lower().endswith(".pdf"): raise HTTPException(status_code=400, detail="Only PDF files are supported") @@ -54,7 +64,11 @@ class QueryRequest(BaseModel): @app.post("/query") -def query_document(req: QueryRequest): +def query_document( + req: QueryRequest, + x_api_secret: str = Header(default=""), +): + verify_secret(x_api_secret) structure = pi_client.get_document_structure(req.doc_id) tools = [ @@ -77,7 +91,10 @@ def query_document(req: QueryRequest): messages = [ { "role": "user", - "content": f"Document structure:\n{structure}\n\nQuestion: {req.question}", + "content": f"Document structure: +{structure} + +Question: {req.question}", } ] From 1a3dce850dcedab50eca6cee43182b4b00187f12 Mon Sep 17 00:00:00 2001 From: Christophe Lapointe Date: Thu, 14 May 2026 09:48:50 -0400 Subject: [PATCH 7/8] fix: downgrade python-dotenv to 1.0.1 for litellm compatibility --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e6ad80531..04bf7eb69 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,6 @@ litellm==1.83.7 # openai-agents # optional: required for examples/agentic_vectorless_rag_demo.py pymupdf==1.26.4 PyPDF2==3.0.1 -python-dotenv==1.2.2 +python-dotenv==1.0.1 pyyaml==6.0.2 + From 3edb57ad40c693f63c3db5fb099fa11432a42649 Mon Sep 17 00:00:00 2001 From: Christophe Lapointe Date: Thu, 14 May 2026 09:50:52 -0400 Subject: [PATCH 8/8] fix: unterminated string literal in f-string at line 94 --- server.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/server.py b/server.py index a791e20b0..853a244b3 100644 --- a/server.py +++ b/server.py @@ -91,10 +91,7 @@ def query_document( messages = [ { "role": "user", - "content": f"Document structure: -{structure} - -Question: {req.question}", + "content": f"Document structure:\n{structure}\n\nQuestion: {req.question}", } ]