Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dist-ssr
.venv
*.python-version
*.env
openshift-images/

# Python cache
__pycache__/
Expand Down
3 changes: 2 additions & 1 deletion agent/src/agent/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ class AgentSettings(BaseSettings):
LLM_API_KEY: str = "ollama"
LLM_BASE_URL: str = "http://localhost:11434/v1"
LLM_MODEL: str = "gemma4:e4b"
EMBEDDER_URL: str = "http://localhost:11434"
EMBEDDER_URL: str = "http://localhost:11434/v1/embeddings"
EMBEDDER_MODEL: str = "nomic-embed-text:latest"
EMBEDDER_KEY: str = ""
Comment on lines +12 to +14

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

cat -n agent/src/agent/config.py

Repository: StavPonte11/text2sql-onboarding

Length of output: 1600


🏁 Script executed:

rg -A 5 -B 5 "EMBEDDER_URL" --type py

Repository: StavPonte11/text2sql-onboarding

Length of output: 4453


🏁 Script executed:

rg "from pydantic" agent/src/agent/config.py

Repository: StavPonte11/text2sql-onboarding

Length of output: 165


Constrain EMBEDDER_URL to HTTP(S) at settings load time.

Line 12 keeps EMBEDDER_URL as an unconstrained str, allowing unsafe URL schemes (ftp, file, etc.) to be passed to embedder callers. The value is directly consumed by urllib.request.Request and httpx.post without scheme validation. Use Pydantic's AnyHttpUrl type to enforce HTTP/HTTPS schemes at configuration initialization and fail fast on misconfiguration.

Suggested hardening
-from pydantic import Field
+from pydantic import AnyHttpUrl, Field

 class AgentSettings(BaseSettings):
     
-    EMBEDDER_URL: str = "http://localhost:11434/v1/embeddings"
+    EMBEDDER_URL: AnyHttpUrl = "http://localhost:11434/v1/embeddings"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
EMBEDDER_URL: str = "http://localhost:11434/v1/embeddings"
EMBEDDER_MODEL: str = "nomic-embed-text:latest"
EMBEDDER_KEY: str = ""
EMBEDDER_URL: AnyHttpUrl = "http://localhost:11434/v1/embeddings"
EMBEDDER_MODEL: str = "nomic-embed-text:latest"
EMBEDDER_KEY: str = ""
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@agent/src/agent/config.py` around lines 12 - 14, The EMBEDDER_URL field on
line 12 is currently typed as a plain str, which allows unsafe URL schemes like
ftp or file to be passed through. Change the EMBEDDER_URL field type from str to
Pydantic's AnyHttpUrl type to enforce that only HTTP and HTTPS schemes are
allowed. This will validate the URL scheme at configuration initialization time
and fail fast if an invalid scheme is provided.

Source: Linters/SAST tools

HYBRID_SEARCH_MAX_TABLES: int = 10
MAX_PROFILES_TO_FETCH: int = 3

Expand Down
5 changes: 1 addition & 4 deletions agent/src/agent/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,4 @@
# Set up logging with correlation ID
setup_logging()

# FastMCP SSE app is a full Starlette app.
# We expose it directly so its lifespan is triggered properly by Uvicorn.
# It exposes the endpoint at /sse.
app = mcp.sse_app()
app = mcp.streamable_http_app()
20 changes: 11 additions & 9 deletions agent/src/agent/nodes/schema_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from sqlmodel import Session, select
from agent.config import settings
from agent.langfuse_client import langfuse_client
from core.embeddings import get_embedding

# Initialize LLM
llm = ChatOpenAI(model=settings.LLM_MODEL, base_url=settings.LLM_BASE_URL, api_key=settings.LLM_API_KEY, temperature=0)
Expand All @@ -39,18 +40,19 @@ class SchemaExplorerOutput(BaseModel):
description="List of strings (table names or options) for the user to choose from. Must be empty if ambiguity_detected is false."
)


def get_query_embedding(text: str) -> list[float]:
"""Generate 768-dimensional embedding from nomic-embed-text."""
# TODO: support secret
url = f"{settings.EMBEDDER_URL}"
data = json.dumps({"model": settings.EMBEDDER_MODEL, "prompt": text}).encode()
req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as res:
return json.loads(res.read().decode())["embedding"]
except Exception as e:
print(f"Error getting query embedding: {e}")
emb = get_embedding(
text=text,
embedder_url=settings.EMBEDDER_URL,
embedder_model=settings.EMBEDDER_MODEL,
embedder_key=settings.EMBEDDER_KEY
)
if emb is None:
print("Error getting query embedding for text")
return [0.0] * 768
return emb

def hybrid_search_tables(query: str, query_embedding: list[float], session: Session, allowed_tables: list[str] | None = None, allowed_statuses: list[str] | None = None) -> list[Table]:
"""Hybrid search combining pgvector cosine distance and keyword matching."""
Expand Down
5 changes: 3 additions & 2 deletions backend/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Settings(BaseSettings):

# Agent MCP service URL (internal service-to-service)
AGENT_URL: str = "http://localhost:8001"

EVALUATION_SERVICE_URL: str = "http://localhost:8001"
OPENMETADATA_TOKEN: str = ""

APP_ENV: str = "development"
Expand Down Expand Up @@ -52,8 +52,9 @@ class Settings(BaseSettings):
JWT_EXPIRE_HOURS: int = 8

# Embedder Config
EMBEDDER_URL: str = "http://host.docker.internal:11434/api/embeddings"
EMBEDDER_URL: str = "http://host.docker.internal:11434/v1/embeddings"
EMBEDDER_MODEL: str = "nomic-embed-text:latest"
EMBEDDER_KEY: str = ""
Comment on lines +55 to +57

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

cat -n backend/app/config.py | head -100

Repository: StavPonte11/text2sql-onboarding

Length of output: 2645


🏁 Script executed:

cat -n backend/app/config.py | tail -20

Repository: StavPonte11/text2sql-onboarding

Length of output: 902


🏁 Script executed:

wc -l backend/app/config.py

Repository: StavPonte11/text2sql-onboarding

Length of output: 100


🏁 Script executed:

rg "EMBEDDER_URL" -A 3 -B 3

Repository: StavPonte11/text2sql-onboarding

Length of output: 3384


🏁 Script executed:

cat -n backend/app/config.py && echo "---" && cat -n agent/src/agent/config.py

Repository: StavPonte11/text2sql-onboarding

Length of output: 4174


🏁 Script executed:

rg "field_validator|Field\(" backend/app/config.py

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


🏁 Script executed:

rg "field_validator|`@validator`" backend/app/ && echo "---FOUND---" || echo "---NO VALIDATORS FOUND---"

Repository: StavPonte11/text2sql-onboarding

Length of output: 101


🏁 Script executed:

cat -n backend/app/seed.py | grep -A 10 "def get_embedding"

Repository: StavPonte11/text2sql-onboarding

Length of output: 711


🏁 Script executed:

rg "http|https|scheme" backend/app/ -i | grep -i "valid\|check\|assert" | head -20

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


🏁 Script executed:

rg "urllib.*parse|urlparse|HttpUrl|AnyUrl" backend/app/

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


🏁 Script executed:

rg "from pydantic" backend/app/config.py

Repository: StavPonte11/text2sql-onboarding

Length of output: 138


🏁 Script executed:

rg "HttpUrl|AnyUrl|field_validator" --type python

Repository: StavPonte11/text2sql-onboarding

Length of output: 106


🏁 Script executed:

rg "HttpUrl|AnyUrl|field_validator" -t py

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


🏁 Script executed:

git log --oneline --all -S "EMBEDDER_URL" | head -5

Repository: StavPonte11/text2sql-onboarding

Length of output: 202


🏁 Script executed:

cat -n backend/app/routers/tables.py | grep -A 5 -B 5 "EMBEDDER_URL"

Repository: StavPonte11/text2sql-onboarding

Length of output: 1010


🏁 Script executed:

git log --all --oneline | head -20

Repository: StavPonte11/text2sql-onboarding

Length of output: 1031


🏁 Script executed:

git log -p --all -S "field_validator" --reverse | head -50

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


🏁 Script executed:

git show 5e7aea0 --stat

Repository: StavPonte11/text2sql-onboarding

Length of output: 456


🏁 Script executed:

git show 462eb08

Repository: StavPonte11/text2sql-onboarding

Length of output: 958


🏁 Script executed:

rg "http" backend/app/config.py

Repository: StavPonte11/text2sql-onboarding

Length of output: 450


🏁 Script executed:

rg "startswith.*http\|http.*startswith\|scheme" backend/app/ agent/src/

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


🏁 Script executed:

rg "ValueError|raise.*Error" backend/app/config.py agent/src/agent/config.py

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


🏁 Script executed:

rg "model_validate\|field_validator\|validator" -t py | head -20

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


Add HTTP(S) scheme validation to EMBEDDER_URL in the Settings class.

The EMBEDDER_URL field at line 55 is a plain str with no validation. Invalid or unsafe URL schemes (e.g., ftp://, file://, data://) can be accepted at startup and passed directly to HTTP clients (httpx.post, urllib.request) without safeguards. Add a field validator to reject non-HTTP(S) values and fail fast at application startup.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/config.py` around lines 55 - 57, The EMBEDDER_URL field in the
Settings class lacks validation for URL schemes, which allows unsafe or
unsupported schemes like ftp://, file://, or data:// to pass through and
potentially cause security or functionality issues. Add a Pydantic field
validator (using `@field_validator` or `@validator` decorator) to the EMBEDDER_URL
field that parses the URL and validates that the scheme is either http or https,
raising a clear validation error if any other scheme is provided. This will
ensure the validation happens at application startup and prevents invalid URLs
from being used downstream.



settings = Settings()
12 changes: 6 additions & 6 deletions backend/app/routers/admin_approval.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import logging
from datetime import datetime

from fastapi import APIRouter, Depends, Header, HTTPException
from pydantic import BaseModel
from sqlmodel import Session, desc, select

from core.db.engine import get_session
from core.models.models import (
EvalRun,
Expand All @@ -14,6 +10,10 @@
Table,
TableStatus,
)
from fastapi import APIRouter, Depends, Header, HTTPException
from pydantic import BaseModel
from sqlmodel import Session, desc, select

from app.services.auth import require_admin
from app.services.langfuse_client import langfuse_client

Expand Down Expand Up @@ -178,7 +178,7 @@ def approve_table(

# 1. Promote status
table.status = TableStatus.production
table.updated_at = datetime.utcnow()
table.updated_at = datetime.now()
session.add(table)
session.commit()

Expand Down Expand Up @@ -216,7 +216,7 @@ def reject_table(
)

table.status = TableStatus.sandbox
table.updated_at = datetime.utcnow()
table.updated_at = datetime.now()
session.add(table)
session.commit()

Expand Down
10 changes: 5 additions & 5 deletions backend/app/routers/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from fastapi import APIRouter, HTTPException
from mcp.client.session import ClientSession
from mcp.client.sse import sse_client
from mcp.client.streamable_http import streamablehttp_client

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check the mcp version declared in backend dependencies
fd -i "requirements" --exec cat {} \; 2>/dev/null
fd -i "pyproject.toml" --exec cat {} \; 2>/dev/null

Repository: StavPonte11/text2sql-onboarding

Length of output: 4198


🌐 Web query:

What version of the mcp Python package introduced streamablehttp_client?

💡 Result:

The streamablehttp_client function was an early API provided by the mcp Python SDK for handling StreamableHTTP transport connections [1][2]. It was later superseded by the streamable_http_client function (note the added underscore), which improved control over the httpx.AsyncClient lifecycle [1][3]. The newer streamable_http_client was introduced in version 1.24.0 of the mcp Python SDK [3]. Following this introduction, the original streamablehttp_client was marked as deprecated and eventually removed from the SDK in subsequent releases [4][3].

Citations:


🌐 Web query:

mcp Python SDK streamablehttp_client availability version history release notes

💡 Result:

The function streamablehttp_client (without the underscore) has been deprecated and subsequently removed from the Model Context Protocol (MCP) Python SDK [1][2]. Availability and Version History: - The function was replaced by streamable_http_client (with an underscore) to accommodate changes in how the SDK handles httpx.AsyncClient instances [3]. - streamablehttp_client was marked as deprecated while streamable_http_client was introduced as the recommended replacement [1][2]. - The removal of the deprecated streamablehttp_client was finalized in early 2026 [1]. Current Status: - As of June 2026, streamable_http_client is the standard, active function used for streamable HTTP transport in the SDK [4][5]. - Developers should update their code to import streamable_http_client from mcp.client.streamable_http and pass an configured httpx.AsyncClient instead of using the older function signatures [2][3]. For further implementation details, the official migration documentation provides comprehensive guides on transitioning from the legacy streamablehttp_client to the current streamable_http_client API [2].

Citations:


🏁 Script executed:

# Search for any documentation or changelog that might specify when streamablehttp_client was available
cd backend && git log --oneline --all --grep="streamable" 2>/dev/null | head -20 || echo "Git not available"

Repository: StavPonte11/text2sql-onboarding

Length of output: 57


Use streamable_http_client instead of the deprecated streamablehttp_client.

The function streamablehttp_client was deprecated and removed from the mcp SDK in early 2026. As of June 2026, the correct import is streamable_http_client (with underscore). Update the import to:

from mcp.client.streamable_http import streamable_http_client
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/routers/agent.py` at line 16, Update the import statement from
the deprecated streamablehttp_client to the current streamable_http_client
function. In the import statement on line 16 that imports from
mcp.client.streamable_http, replace the function name streamablehttp_client (no
underscore) with streamable_http_client (with underscores). This ensures
compatibility with the current version of the mcp SDK as of June 2026.

from pydantic import BaseModel

from app.config import settings
Expand Down Expand Up @@ -60,15 +60,15 @@ class ChatResponse(BaseModel):

async def _call_agent_mcp(tool_arguments: dict) -> dict:
"""
Connects to the agent MCP server over SSE, initializes the session,
Connects to the agent MCP server over Streamable HTTP, initializes the session,
calls the 'chat_with_agent' tool, and returns the parsed result.
"""
url = f"{settings.AGENT_URL}/sse"
url = f"{settings.AGENT_URL}/mcp"
logger.debug("Connecting to agent MCP: %s args=%s", url, tool_arguments)

try:
async with sse_client(url) as streams:
async with ClientSession(*streams) as session:
async with streamablehttp_client(url) as (read_stream, write_stream, _):
async with ClientSession(read_stream, write_stream) as session:
await session.initialize()

# Call the tool using the MCP client session
Expand Down
7 changes: 3 additions & 4 deletions backend/app/routers/enrichment.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import logging
from datetime import datetime

from fastapi import APIRouter, Depends, HTTPException
from sqlmodel import Session, col, select

from core.db.engine import get_session
from core.models.models import (
EnrichmentCreate,
Expand All @@ -12,6 +9,8 @@
Table,
TableStatus,
)
from fastapi import APIRouter, Depends, HTTPException
from sqlmodel import Session, col, select

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -61,7 +60,7 @@ def create_enrichment(
f"({', '.join(changed_keys)}) → degraded."
)
table.status = TableStatus.degraded
table.updated_at = datetime.utcnow()
table.updated_at = datetime.now()
session.add(table)

ev = EnrichmentVersion(
Expand Down
Loading