Skip to content

Commit acba670

Browse files
authored
use litellm sdk (#424)
* use litellm sdk * address comments * another fix * unify packages and also set keys when initializing * add comment * update * nit * update * add * pass user * undo langfuse * missing
1 parent 1d07878 commit acba670

File tree

11 files changed

+219
-209
lines changed

11 files changed

+219
-209
lines changed

eval_protocol/adapters/fireworks_tracing.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def __call__(
4646
...
4747

4848

49-
def extract_openai_response(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
49+
def extract_otel_attributes(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
5050
"""Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
5151
5252
Args:
@@ -137,9 +137,14 @@ def convert_trace_dict_to_evaluation_row(
137137

138138
observations = trace.get("observations") or []
139139
# We can only extract when stored in OTEL format.
140-
openai_response = extract_openai_response(observations)
141-
if openai_response:
142-
choices = openai_response.get("llm.openai.choices")
140+
otel_attributes = extract_otel_attributes(observations)
141+
if otel_attributes:
142+
# Find choices from any provider (llm.*.choices pattern)
143+
choices = None
144+
for key, value in otel_attributes.items():
145+
if key.endswith(".choices") and isinstance(value, list):
146+
choices = value
147+
break
143148
if choices and len(choices) > 0:
144149
execution_metadata.finish_reason = choices[0].get("finish_reason")
145150

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
1-
# Metadata Extraction Gateway - Sits in front of LiteLLM
1+
# Metadata Extraction Gateway - Uses LiteLLM SDK directly with Langfuse OTEL
22
FROM python:3.11-slim
33

44
WORKDIR /app
55

66
# Prevent Python from buffering stdout/stderr
77
ENV PYTHONUNBUFFERED=1
88

9-
# Copy requirements file
10-
COPY ./requirements.txt /app/requirements.txt
9+
# Copy the entire package for local install (context is repo root)
10+
COPY pyproject.toml /app/pyproject.toml
11+
COPY eval_protocol /app/eval_protocol
12+
COPY README.md /app/README.md
1113

12-
# Install dependencies
13-
RUN pip install --no-cache-dir -r requirements.txt
14+
# Install from local source with proxy extras
15+
RUN pip install --no-cache-dir ".[proxy]"
1416

15-
# Copy the proxy package
16-
COPY ./proxy_core /app/proxy_core
17+
# Copy the proxy package (local overrides for main.py, auth.py, etc.)
18+
COPY eval_protocol/proxy/proxy_core /app/proxy_core
1719

1820
# Expose port
1921
EXPOSE 4000
2022

2123
# Run the gateway as a module
22-
# LITELLM_URL will be set by environment (docker-compose or Cloud Run)
24+
# LANGFUSE_HOST and REDIS_HOST will be set by environment (docker-compose or Cloud Run)
2325
CMD ["python", "-m", "proxy_core.main"]

eval_protocol/proxy/README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ This enables distributed evaluation systems to track which LLM completions belon
5959
- Stores insertion IDs per rollout for completeness checking
6060
- Uses Redis Sets: `rollout_id -> {insertion_id_1, insertion_id_2, ...}`
6161

62-
#### 3. **LiteLLM Backend**
63-
- Standard LiteLLM proxy for routing to LLM providers
64-
- Configured with Langfuse callbacks for automatic tracing
62+
#### 3. **LiteLLM SDK (Direct)**
63+
- Uses LiteLLM SDK directly for LLM calls (no separate proxy server needed)
64+
- Integrated with Langfuse via `langfuse_otel` OpenTelemetry callback
6565

6666
## Key Features
6767

@@ -244,12 +244,11 @@ Forwards any other request to LiteLLM backend with API key injection.
244244

245245
| Variable | Required | Default | Description |
246246
|----------|----------|---------|-------------|
247-
| `LITELLM_URL` | Yes | - | URL of LiteLLM backend |
248247
| `REDIS_HOST` | Yes | - | Redis hostname |
249248
| `REDIS_PORT` | No | 6379 | Redis port |
250249
| `REDIS_PASSWORD` | No | - | Redis password |
251250
| `SECRETS_PATH` | No | `proxy_core/secrets.yaml` | Path to secrets file (YAML) |
252-
| `LANGFUSE_HOST` | No | `https://cloud.langfuse.com` | Langfuse base URL |
251+
| `LANGFUSE_HOST` | No | `https://us.cloud.langfuse.com` | Langfuse OTEL host for tracing |
253252
| `REQUEST_TIMEOUT` | No | 300.0 | Request timeout (LLM calls) in seconds |
254253
| `LOG_LEVEL` | No | INFO | Logging level |
255254
| `PORT` | No | 4000 | Gateway port |
@@ -272,25 +271,26 @@ default_project_id: project-1
272271

273272
### LiteLLM Configuration
274273

275-
The `config_no_cache.yaml` configures LiteLLM:
274+
The `config_no_cache.yaml` configures LiteLLM (only needed if running a standalone LiteLLM proxy):
276275
```yaml
277276
model_list:
278277
- model_name: "*"
279278
litellm_params:
280279
model: "*"
281280
litellm_settings:
282-
success_callback: ["langfuse"]
283-
failure_callback: ["langfuse"]
281+
callbacks: ["langfuse_otel"]
284282
drop_params: True
285283
general_settings:
286284
allow_client_side_credentials: true
287285
```
288286

289287
Key settings:
290288
- **Wildcard model support**: Route any model to any provider
291-
- **Langfuse callbacks**: Automatic tracing on success/failure
289+
- **Langfuse OTEL**: OpenTelemetry-based tracing via `langfuse_otel` callback
292290
- **Client-side credentials**: Accept API keys from request body
293291

292+
**Note:** The proxy now uses the LiteLLM SDK directly with `langfuse_otel` integration, so a separate LiteLLM proxy server is no longer required.
293+
294294
## Security Considerations
295295

296296
### Authentication

eval_protocol/proxy/config_no_cache.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ model_list:
33
litellm_params:
44
model: "*"
55
litellm_settings:
6-
success_callback: ["langfuse"]
7-
failure_callback: ["langfuse"]
6+
callbacks: ["langfuse_otel"]
87
drop_params: True
98
general_settings:
109
allow_client_side_credentials: true

eval_protocol/proxy/docker-compose.yml

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,41 +7,19 @@ services:
77
ports:
88
- "6379:6379" # Expose for debugging if needed
99
networks:
10-
- litellm-network
10+
- proxy-network
1111
restart: unless-stopped
1212
command: redis-server --appendonly yes
1313
volumes:
1414
- redis-data:/data
1515

16-
# LiteLLM Backend - Handles actual LLM proxying
17-
litellm-backend:
18-
image: litellm/litellm:v1.77.3-stable
19-
platform: linux/amd64
20-
container_name: litellm-backend
21-
command: ["--config", "/app/config.yaml", "--port", "4000", "--host", "0.0.0.0"]
22-
# If you want to be able to use other model providers like OpenAI, Anthropic, etc., you need to set keys in .env file.
23-
env_file:
24-
- .env # Load API keys from .env file
25-
environment:
26-
- LANGFUSE_PUBLIC_KEY=dummy # Set dummy public and private key so Langfuse instance initializes in LiteLLM, then real keys get sent in proxy
27-
- LANGFUSE_SECRET_KEY=dummy
28-
volumes:
29-
- ./config_no_cache.yaml:/app/config.yaml:ro
30-
ports:
31-
- "4001:4000" # Expose on 4001 for direct access if needed
32-
networks:
33-
- litellm-network
34-
restart: unless-stopped
35-
36-
# Metadata Gateway - Public-facing service that extracts metadata from URLs
16+
# Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL
3717
metadata-gateway:
3818
build:
39-
context: .
40-
dockerfile: Dockerfile.gateway
19+
context: ../..
20+
dockerfile: eval_protocol/proxy/Dockerfile.gateway
4121
container_name: metadata-gateway
4222
environment:
43-
# Point to the LiteLLM backend service
44-
- LITELLM_URL=http://litellm-backend:4000
4523
- PORT=4000
4624
# Redis configuration for assistant message counting
4725
- REDIS_HOST=redis
@@ -56,14 +34,13 @@ services:
5634
ports:
5735
- "4000:4000" # Main public-facing port
5836
networks:
59-
- litellm-network
37+
- proxy-network
6038
depends_on:
61-
- litellm-backend
6239
- redis
6340
restart: unless-stopped
6441

6542
networks:
66-
litellm-network:
43+
proxy-network:
6744
driver: bridge
6845

6946
volumes:

eval_protocol/proxy/proxy_core/app.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
1717
from .auth import AuthProvider, NoAuthProvider
18-
from .litellm import handle_chat_completion, proxy_to_litellm
18+
from .litellm import handle_chat_completion
1919
from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
2020

2121
# Configure logging before any other imports (so all modules inherit this config)
@@ -35,10 +35,6 @@ def build_proxy_config(
3535
preprocess_traces_request: Optional[TracesRequestHook] = None,
3636
) -> ProxyConfig:
3737
"""Load environment and secrets, and build ProxyConfig"""
38-
# Env
39-
litellm_url = os.getenv("LITELLM_URL")
40-
if not litellm_url:
41-
raise ValueError("LITELLM_URL environment variable must be set")
4238
request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
4339
langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
4440

@@ -66,7 +62,6 @@ def build_proxy_config(
6662
raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")
6763

6864
return ProxyConfig(
69-
litellm_url=litellm_url,
7065
request_timeout=request_timeout,
7166
langfuse_host=langfuse_host,
7267
langfuse_keys=langfuse_keys,
@@ -113,6 +108,16 @@ async def lifespan(app: FastAPI):
113108
app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
114109
app.state.redis = init_redis()
115110

111+
config = app.state.config
112+
default_keys = config.langfuse_keys[config.default_project_id]
113+
os.environ["LANGFUSE_PUBLIC_KEY"] = default_keys["public_key"]
114+
os.environ["LANGFUSE_SECRET_KEY"] = default_keys["secret_key"]
115+
os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)
116+
117+
import litellm
118+
119+
litellm.callbacks = ["langfuse_otel"]
120+
116121
try:
117122
yield
118123
finally:
@@ -297,13 +302,4 @@ async def pointwise_get_langfuse_trace(
297302
async def health():
298303
return {"status": "healthy", "service": "metadata-proxy"}
299304

300-
# Catch-all
301-
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
302-
async def catch_all_proxy(
303-
path: str,
304-
request: Request,
305-
config: ProxyConfig = Depends(get_config),
306-
):
307-
return await proxy_to_litellm(config, path, request)
308-
309305
return app

0 commit comments

Comments
 (0)