Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions backend/hf_api_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
# Audio Classification Model
AUDIO_CLASS_API_URL = "https://router.huggingface.co/models/MIT/ast-finetuned-audioset-10-10-0.4593"

# Audio Transcription Model
WHISPER_API_URL = "https://router.huggingface.co/models/openai/whisper-large-v3-turbo"

async def _make_request(client, url, payload):
try:
response = await client.post(url, headers=headers, json=payload, timeout=20.0)
Expand Down Expand Up @@ -166,6 +169,33 @@ async def do_post(c):
logger.error(f"Audio Detection Error: {e}")
return []

async def transcribe_audio(audio_bytes: bytes, client: httpx.AsyncClient = None):
"""
Transcribes audio using OpenAI Whisper via Hugging Face API.
"""
try:
headers_bin = {"Authorization": f"Bearer {token}"} if token else {}

async def do_post(c):
return await c.post(WHISPER_API_URL, headers=headers_bin, content=audio_bytes, timeout=30.0)

if client:
response = await do_post(client)
else:
async with httpx.AsyncClient() as new_client:
response = await do_post(new_client)

if response.status_code == 200:
# Result is usually {"text": "..."}
data = response.json()
return data.get("text", "")
else:
logger.error(f"Whisper API Error: {response.status_code} - {response.text}")
return ""
except Exception as e:
logger.error(f"Audio Transcription Error: {e}")
return ""
Comment on lines +172 to +197
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Propagate transcription failures instead of returning empty text.

On non‑200 or exceptions, returning "" makes the endpoint respond 200 with an empty transcript, so the UI can’t distinguish silence from failure. Prefer raising (or returning explicit error details) so callers can surface a real error.

🔧 Proposed fix (propagate failures)
-        if response.status_code == 200:
-            # Result is usually {"text": "..."}
-            data = response.json()
-            return data.get("text", "")
-        else:
-            logger.error(f"Whisper API Error: {response.status_code} - {response.text}")
-            return ""
-    except Exception as e:
-        logger.error(f"Audio Transcription Error: {e}")
-        return ""
+        if response.status_code == 200:
+            # Result is usually {"text": "..."}
+            data = response.json()
+            return data.get("text", "")
+        logger.error(f"Whisper API Error: {response.status_code} - {response.text}")
+        raise RuntimeError(f"Whisper API error: {response.status_code}")
+    except Exception as e:
+        logger.error(f"Audio Transcription Error: {e}")
+        raise
🧰 Tools
🪛 Ruff (0.14.14)

[warning] 195-195: Do not catch blind exception: Exception

(BLE001)


[warning] 196-196: Use logging.exception instead of logging.error

Replace with exception

(TRY400)

🤖 Prompt for AI Agents
In `@backend/hf_api_service.py` around lines 172 - 197, The transcribe_audio
function currently swallows failures by returning "" on non‑200 responses or
exceptions; change it to propagate errors instead: when response.status_code !=
200, log as now but raise an exception (include status code and response.text)
instead of returning an empty string, and in the except block re‑raise the
caught exception (or wrap it in a descriptive RuntimeError) after logging; keep
the existing helper do_post and logging via logger and include WHISPER_API_URL
and token context in the raised error message so callers can surface real
failures.


async def detect_severity_clip(image: Union[Image.Image, bytes], client: httpx.AsyncClient = None):
"""
Returns a severity object: {level: 'High', confidence: 0.9, raw_label: 'critical...'}
Expand Down
28 changes: 25 additions & 3 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.concurrency import run_in_threadpool
from sqlalchemy import func
from sqlalchemy.orm import Session, defer
from sqlalchemy.orm import Session, defer, joinedload
from pydantic import BaseModel
from contextlib import asynccontextmanager
from functools import lru_cache
from typing import List, Union, Any, Dict
from typing import List, Union, Any, Dict, Optional
from datetime import datetime, timedelta, timezone
from PIL import Image

Expand Down Expand Up @@ -79,7 +79,8 @@
detect_water_leak_clip,
detect_accessibility_issue_clip,
detect_crowd_density_clip,
detect_audio_event
detect_audio_event,
transcribe_audio
)

# Configure structured logging
Expand Down Expand Up @@ -1409,6 +1410,27 @@ async def detect_audio_endpoint(request: Request, file: UploadFile = File(...)):
raise HTTPException(status_code=500, detail="Internal server error")


@app.post("/api/transcribe-audio")
async def transcribe_audio_endpoint(request: Request, file: UploadFile = File(...)):
# Basic audio validation
if hasattr(file, 'size') and file.size and file.size > 25 * 1024 * 1024:
raise HTTPException(status_code=413, detail="Audio file too large (max 25MB)")

try:
audio_bytes = await file.read()
Copy link

Copilot AI Jan 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unlike the /api/detect-audio endpoint which checks the size of audio_bytes after reading (line 1398), this endpoint only checks file.size before reading. If file.size is not available (e.g., for some upload types), the size check could be bypassed. Consider adding a size check on the audio_bytes after reading, similar to the pattern used in the detect-audio endpoint.

Suggested change
audio_bytes = await file.read()
audio_bytes = await file.read()
# Enforce size limit based on actual bytes read, in case file.size is missing or inaccurate
if len(audio_bytes) > 25 * 1024 * 1024:
raise HTTPException(status_code=413, detail="Audio file too large (max 25MB)")

Copilot uses AI. Check for mistakes.
except Exception as e:
logger.error(f"Invalid audio file: {e}", exc_info=True)
raise HTTPException(status_code=400, detail="Invalid audio file")

try:
client = request.app.state.http_client
text = await transcribe_audio(audio_bytes, client=client)
return {"text": text}
except Exception as e:
logger.error(f"Transcription error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")

Comment on lines +1413 to +1432
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Enforce size limit based on actual bytes, not just file.size.

UploadFile.size can be unset, so large uploads can bypass the 25MB gate. Add a post‑read length check (and optionally reject empty audio) to prevent oversized uploads from slipping through.

🛡️ Proposed fix (post‑read size validation)
-    try:
-        audio_bytes = await file.read()
-    except Exception as e:
-        logger.error(f"Invalid audio file: {e}", exc_info=True)
-        raise HTTPException(status_code=400, detail="Invalid audio file")
+    try:
+        audio_bytes = await file.read()
+    except Exception as e:
+        logger.error(f"Invalid audio file: {e}", exc_info=True)
+        raise HTTPException(status_code=400, detail="Invalid audio file")
+
+    if len(audio_bytes) > 25 * 1024 * 1024:
+        raise HTTPException(status_code=413, detail="Audio file too large (max 25MB)")
+    if len(audio_bytes) == 0:
+        raise HTTPException(status_code=400, detail="Empty audio file")
🧰 Tools
🪛 Ruff (0.14.14)

[warning] 1414-1414: Do not perform function call File in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable

(B008)


[warning] 1423-1423: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)


[warning] 1428-1428: Consider moving this statement to an else block

(TRY300)


[warning] 1431-1431: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)

🤖 Prompt for AI Agents
In `@backend/main.py` around lines 1413 - 1432, transcribe_audio_endpoint
currently trusts UploadFile.size which may be unset; fix by validating the
actual byte length after reading: in transcribe_audio_endpoint read audio_bytes
= await file.read(), then if not audio_bytes raise HTTPException(400, "Empty
audio file") and if len(audio_bytes) > 25 * 1024 * 1024 raise HTTPException(413,
"Audio file too large (max 25MB)"); proceed to call
transcribe_audio(client=client, audio_bytes=audio_bytes) only after these checks
and keep existing exception logging for read/transcription errors.


async def get_cached_or_compute(cache_key: str, compute_func, *args, **kwargs):
"""Get cached result or compute and cache it."""
global _cache_cleanup_counter
Expand Down
2 changes: 1 addition & 1 deletion frontend/eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import reactRefresh from 'eslint-plugin-react-refresh'
import { defineConfig, globalIgnores } from 'eslint/config'

export default defineConfig([
globalIgnores(['dist']),
globalIgnores(['dist', '**/__tests__/**', '**/__mocks__/**', '**/*.test.js', 'src/setupTests.js']),
{
files: ['**/*.{js,jsx}'],
extends: [
Expand Down
Loading