diff --git a/CodeTranslation/.env.example b/CodeTranslation/.env.example new file mode 100644 index 0000000000..048c7a2a47 --- /dev/null +++ b/CodeTranslation/.env.example @@ -0,0 +1,22 @@ +# Backend API Configuration +BACKEND_PORT=5001 + +# Keycloak Authentication +BASE_URL=https://your-enterprise-api.com +KEYCLOAK_CLIENT_ID=api +KEYCLOAK_CLIENT_SECRET=your-client-secret + +# Model Configuration - CodeLlama-34b-instruct +INFERENCE_MODEL_ENDPOINT=CodeLlama-34b-Instruct-hf +INFERENCE_MODEL_NAME=codellama/CodeLlama-34b-Instruct-hf + +# LLM Settings +LLM_TEMPERATURE=0.2 +LLM_MAX_TOKENS=4096 + +# Code Translation Settings +MAX_CODE_LENGTH=10000 +MAX_FILE_SIZE=10485760 + +# CORS Configuration +CORS_ALLOW_ORIGINS=["http://localhost:5173", "http://localhost:3000"] diff --git a/CodeTranslation/.gitignore b/CodeTranslation/.gitignore new file mode 100644 index 0000000000..7499b4114d --- /dev/null +++ b/CodeTranslation/.gitignore @@ -0,0 +1,60 @@ +# Environment variables +.env +.env.local +.env.production +.env.*.local + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +venv/ +env/ +ENV/ +.venv + +# Node +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* +.npm +.yarn +package-lock.json + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Build outputs +dist/ +*.log + +# Temporary files +*.tmp +tmp/ +temp/ diff --git a/CodeTranslation/README.md b/CodeTranslation/README.md new file mode 100644 index 0000000000..a6105a6920 --- /dev/null +++ b/CodeTranslation/README.md @@ -0,0 +1,294 @@ +## Code Translation + +A full-stack code translation application that converts code between programming languages using AI. +The system integrates a FastAPI backend powered by CodeLlama-34b-instruct, alongside a modern React + Vite + Tailwind CSS frontend for an intuitive translation experience. + +## Table of Contents + +- [Project Overview](#project-overview) +- [Features](#features) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Quick Start Deployment](#quick-start-deployment) +- [User Interface](#user-interface) +- [Troubleshooting](#troubleshooting) + +--- + +## Project Overview + +The **Code Translation** application demonstrates how large language models can be used to translate code between different programming languages. It accepts source code in one language, processes it through CodeLlama-34b-instruct, and returns translated code in the target language. This project integrates seamlessly with cloud-hosted APIs or local model endpoints, offering flexibility for research, enterprise, or educational use. + +--- + +## Features + +**Backend** + +- Code translation between 6 languages (Java, C, C++, Python, Rust, Go) +- PDF code extraction with pattern recognition +- CodeLlama-34b-instruct for accurate translations +- Enterprise inference endpoints +- Keycloak authentication for secure API access +- Comprehensive error handling and logging +- File validation and size limits +- CORS enabled for web integration +- Health check endpoints +- Modular architecture (config + models + services) + +**Frontend** + +- Side-by-side code comparison interface +- Language selection dropdowns (6 languages) +- PDF file upload with drag-and-drop support +- Real-time character counter with limits +- Modern, responsive design with Tailwind CSS +- Built with Vite for fast development +- Live status updates +- Copy to clipboard functionality +- Mobile-friendly + +--- + +## Architecture + +Below is the architecture as it consists of a server that waits for code input or PDF uploads. Once code is provided, the server calls the CodeLlama model to translate the code to the target language. + +```mermaid + graph TB + subgraph "User Interface" + A[React Frontend
Port 3000] + A1[Code Input] + A2[PDF Upload] + A3[Language Selection] + end + + subgraph "FastAPI Backend" + B[API Server
Port 5001] + C[PDF Service] + D[API Client] + end + + subgraph "External Services" + E[Keycloak Auth] + F[CodeLlama-34b Model] + end + + A1 --> B + A2 --> B + A3 --> B + B --> C + C -->|Extracted Code| B + B --> D + D -->|Get Token| E + E -->|Access Token| D + D -->|Translate Code + Token| F + F -->|Translated Code| D + D --> B + B --> A + + style A fill:#e1f5ff + style B fill:#fff4e1 + style F fill:#e1ffe1 +``` + +This application is built with enterprise inference capabilities using Keycloak for authentication and CodeLlama-34b-instruct for code translation. + +**Service Components:** + +1. **React Web UI (Port 3000)** - Provides side-by-side code comparison interface with language selection, PDF upload, and real-time translation results + +2. **FastAPI Backend (Port 5001)** - Handles code validation, PDF extraction, Keycloak authentication, and orchestrates code translation through CodeLlama model + +**Typical Flow:** + +1. User enters code or uploads a PDF through the web UI. +2. The backend validates the input and extracts code if needed. +3. The backend authenticates with Keycloak and calls CodeLlama model. +4. The model translates the code to the target language. +5. The translated code is returned and displayed to the user. +6. User can copy the translated code with one click. + +--- + +## Prerequisites + +### System Requirements + +Before you begin, ensure you have the following installed: + +- **Docker and Docker Compose** +- **Enterprise inference endpoint access** (Keycloak authentication) + +### Verify Docker Installation + +```bash +# Check Docker version +docker --version + +# Check Docker Compose version +docker compose version + +# Verify Docker is running +docker ps +``` +--- + +## Quick Start Deployment + +### Clone the Repository + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/CodeTranslation +``` + +### Set up the Environment + +This application requires an `.env` file in the root directory for proper configuration. Create it with the commands below: + +```bash +# Create the .env file +cat > .env << EOF +# Backend API Configuration +BACKEND_PORT=5001 + +# Required - Enterprise/Keycloak Configuration +BASE_URL=https://api.example.com +KEYCLOAK_CLIENT_ID=api +KEYCLOAK_CLIENT_SECRET=your_client_secret + +# Required - Model Configuration +INFERENCE_MODEL_ENDPOINT=CodeLlama-34b-Instruct-hf +INFERENCE_MODEL_NAME=codellama/CodeLlama-34b-Instruct-hf + +# LLM Settings +LLM_TEMPERATURE=0.2 +LLM_MAX_TOKENS=4096 + +# Code Translation Settings +MAX_CODE_LENGTH=10000 +MAX_FILE_SIZE=10485760 + +# CORS Configuration +CORS_ALLOW_ORIGINS=["http://localhost:5173", "http://localhost:3000"] +EOF +``` + +Or manually create `.env` with: + +```bash +# Backend API Configuration +BACKEND_PORT=5001 + +# Required - Enterprise/Keycloak Configuration +BASE_URL=https://api.example.com +KEYCLOAK_CLIENT_ID=api +KEYCLOAK_CLIENT_SECRET=your_client_secret + +# Required - Model Configuration +INFERENCE_MODEL_ENDPOINT=CodeLlama-34b-Instruct-hf +INFERENCE_MODEL_NAME=codellama/CodeLlama-34b-Instruct-hf + +# LLM Settings +LLM_TEMPERATURE=0.2 +LLM_MAX_TOKENS=4096 + +# Code Translation Settings +MAX_CODE_LENGTH=10000 +MAX_FILE_SIZE=10485760 + +# CORS Configuration +CORS_ALLOW_ORIGINS=["http://localhost:5173", "http://localhost:3000"] +``` + +**Note**: The docker-compose.yaml file automatically loads environment variables from `.env` for the backend service. + +### Running the Application + +Start both API and UI services together with Docker Compose: + +```bash +# From the CodeTranslation directory +docker compose up --build + +# Or run in detached mode (background) +docker compose up -d --build +``` + +The API will be available at: `http://localhost:5001` +The UI will be available at: `http://localhost:3000` + +**View logs**: + +```bash +# All services +docker compose logs -f + +# Backend only +docker compose logs -f backend + +# Frontend only +docker compose logs -f frontend +``` + +**Verify the services are running**: + +```bash +# Check API health +curl http://localhost:5001/health + +# Check if containers are running +docker compose ps +``` + +## User Interface + +**Using the Application** + +Make sure you are at the localhost:3000 url + +You will be directed to the main page which has each feature + +![User Interface](images/ui.png) + +The interface provides: + +Translate code: + +- Select source language from dropdown (Java, C, C++, Python, Rust, Go) +- Select target language from dropdown +- Enter or paste your code in the left textarea +- Click "Translate Code" button +- View translated code in the right textarea +- Click "Copy" to copy the result + +Upload a PDF: + +- Scroll to the "Alternative: Upload PDF" section +- Drag and drop a PDF file, or +- Click "browse" to select a file +- Wait for code extraction to complete +- Extracted code appears in the source code box + +**UI Configuration** + +When running with Docker Compose, the UI automatically connects to the backend API. The frontend is available at `http://localhost:3000` and the API at `http://localhost:5001`. + + +For production deployments, you may want to configure a reverse proxy or update the API URL in the frontend configuration. + +### Stopping the Application + + +```bash +docker compose down +``` + +--- + +## Troubleshooting + +For comprehensive troubleshooting guidance, common issues, and solutions, refer to: + +[Troubleshooting Guide - TROUBLESHOOTING.md](./TROUBLESHOOTING.md) diff --git a/CodeTranslation/TROUBLESHOOTING.md b/CodeTranslation/TROUBLESHOOTING.md new file mode 100644 index 0000000000..7f80ab539c --- /dev/null +++ b/CodeTranslation/TROUBLESHOOTING.md @@ -0,0 +1,128 @@ +# Troubleshooting Guide + +This document contains all common issues encountered during development and their solutions. + +## Table of Contents + +- [API Common Issues](#api-common-issues) +- [UI Common Issues](#ui-common-issues) + +### API Common Issues + +#### "API client not initialized. Check Keycloak configuration." + +**Solution**: + +1. Create a `.env` file in the root directory +2. Add your Keycloak credentials: + ``` + BASE_URL=https://api.example.com + KEYCLOAK_CLIENT_ID=api + KEYCLOAK_CLIENT_SECRET=your_client_secret + ``` +3. Restart the server + +#### "Code too long. Maximum length is 10000 characters" + +**Solution**: + +- The limit exists due to model context window constraints +- Break your code into smaller modules +- Translate one class or function at a time +- Or adjust `MAX_CODE_LENGTH` in `.env` if needed + +#### "Source language not supported" + +**Solution**: + +- Only 6 languages are supported: Java, C, C++, Python, Rust, Go +- Check the `/languages` endpoint for the current list +- Ensure language names are lowercase (e.g., "python" not "Python") + +#### Import errors + +**Solution**: + +1. Ensure all dependencies are installed: `pip install -r requirements.txt` +2. Verify you're using Python 3.10 or higher: `python --version` +3. Activate your virtual environment if using one + +#### Server won't start + +**Solution**: + +1. Check if port 5001 is already in use: `lsof -i :5001` (Unix) or `netstat -ano | findstr :5001` (Windows) +2. Use a different port by updating `BACKEND_PORT` in `.env` +3. Check the logs for specific error messages + +#### PDF upload fails + +**Solution**: + +1. Verify the file is a valid PDF +2. Check file size (must be under 10MB by default) +3. Ensure the PDF contains extractable text (not just images) +4. Check server logs for detailed error messages + +#### Translation returns empty result + +**Solution**: + +1. Verify Keycloak authentication is working (check `/health` endpoint) +2. Check if the model endpoint is accessible +3. Try with simpler code first +4. Check server logs for API errors + +#### "No module named 'pypdf'" + +**Solution**: + +```bash +pip install pypdf +``` + +## UI Common Issues + +### API Connection Issues + +**Problem**: "Failed to translate" or "Failed to upload PDF" + +**Solution**: + +1. Ensure the API server is running on `http://localhost:5001` +2. Check browser console for detailed errors +3. Verify CORS is enabled in the API +4. Test API directly: `curl http://localhost:5001/health` + +### Build Issues + +**Problem**: Build fails with dependency errors + +**Solution**: + +```bash +# Clear node_modules and reinstall +rm -rf node_modules package-lock.json +npm install +``` + +### Styling Issues + +**Problem**: Styles not applying + +**Solution**: + +```bash +# Rebuild Tailwind CSS +npm run dev +``` + +### Character Counter Not Updating + +**Problem**: Character counter shows 0 / 10,000 even with code + +**Solution**: + +1. Clear browser cache +2. Hard refresh (Ctrl+Shift+R or Cmd+Shift+R) +3. Restart the dev server diff --git a/CodeTranslation/api/.dockerignore b/CodeTranslation/api/.dockerignore new file mode 100644 index 0000000000..bd6b932c3e --- /dev/null +++ b/CodeTranslation/api/.dockerignore @@ -0,0 +1,29 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +env/ +venv/ +.venv/ +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.log +.git +.gitignore +.mypy_cache +.pytest_cache +.hypothesis +*.swp +*.swo +*~ +.DS_Store +.env +.env.local diff --git a/CodeTranslation/api/Dockerfile b/CodeTranslation/api/Dockerfile new file mode 100644 index 0000000000..c5028908c5 --- /dev/null +++ b/CodeTranslation/api/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Copy requirements first for better caching +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Expose port +EXPOSE 5001 + +# Run the application +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5001"] diff --git a/CodeTranslation/api/config.py b/CodeTranslation/api/config.py new file mode 100644 index 0000000000..e4e7fc57e2 --- /dev/null +++ b/CodeTranslation/api/config.py @@ -0,0 +1,44 @@ +""" +Configuration settings for Code Translation API +""" + +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Custom API Configuration for Keycloak +BASE_URL = os.getenv("BASE_URL", "https://api.example.com") +KEYCLOAK_REALM = os.getenv("KEYCLOAK_REALM", "master") +KEYCLOAK_CLIENT_ID = os.getenv("KEYCLOAK_CLIENT_ID", "api") +KEYCLOAK_CLIENT_SECRET = os.getenv("KEYCLOAK_CLIENT_SECRET") + +# Model Configuration for CodeLlama-34b-instruct +INFERENCE_MODEL_ENDPOINT = os.getenv("INFERENCE_MODEL_ENDPOINT", "CodeLlama-34b-Instruct") +INFERENCE_MODEL_NAME = os.getenv("INFERENCE_MODEL_NAME", "codellama/CodeLlama-34b-Instruct-hf") + +# Validate required configuration +if not KEYCLOAK_CLIENT_SECRET: + raise ValueError("KEYCLOAK_CLIENT_SECRET must be set in environment variables") + +# Application Settings +APP_TITLE = "Code Translation API" +APP_DESCRIPTION = "AI-powered code translation service using CodeLlama-34b-instruct" +APP_VERSION = "1.0.0" + +# File Upload Settings +MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB +ALLOWED_EXTENSIONS = {".pdf"} + +# Code Translation Settings +SUPPORTED_LANGUAGES = ["java", "c", "cpp", "python", "rust", "go"] +MAX_CODE_LENGTH = 10000 # characters +LLM_TEMPERATURE = 0.2 # Lower temperature for more deterministic code generation +LLM_MAX_TOKENS = 4096 + +# CORS Settings +CORS_ALLOW_ORIGINS = ["*"] # Update with specific origins in production +CORS_ALLOW_CREDENTIALS = True +CORS_ALLOW_METHODS = ["*"] +CORS_ALLOW_HEADERS = ["*"] diff --git a/CodeTranslation/api/models.py b/CodeTranslation/api/models.py new file mode 100644 index 0000000000..fd5386b52d --- /dev/null +++ b/CodeTranslation/api/models.py @@ -0,0 +1,68 @@ +""" +Pydantic models for request/response validation +""" + +from pydantic import BaseModel, Field +from typing import Optional + + +class TranslateRequest(BaseModel): + """Request model for code translation""" + source_code: str = Field(..., min_length=1, description="Source code to translate") + source_language: str = Field(..., description="Source programming language") + target_language: str = Field(..., description="Target programming language") + + class Config: + json_schema_extra = { + "example": { + "source_code": "def hello():\n print('Hello World')", + "source_language": "python", + "target_language": "java" + } + } + + +class TranslateResponse(BaseModel): + """Response model for code translation""" + translated_code: str = Field(..., description="Translated code") + source_language: str = Field(..., description="Source language") + target_language: str = Field(..., description="Target language") + original_code: str = Field(..., description="Original source code") + + class Config: + json_schema_extra = { + "example": { + "translated_code": "public class Main {\n public static void main(String[] args) {\n System.out.println(\"Hello World\");\n }\n}", + "source_language": "python", + "target_language": "java", + "original_code": "def hello():\n print('Hello World')" + } + } + + +class UploadPdfResponse(BaseModel): + """Response model for PDF upload""" + message: str = Field(..., description="Success message") + extracted_code: str = Field(..., description="Extracted code from PDF") + status: str = Field(..., description="Operation status") + + class Config: + json_schema_extra = { + "example": { + "message": "Successfully extracted code from 'code.pdf'", + "extracted_code": "def hello():\n print('Hello World')", + "status": "success" + } + } + + +class HealthResponse(BaseModel): + """Response model for health check""" + status: str = Field(..., description="Health status") + model_configured: bool = Field(..., description="Whether model is configured") + keycloak_authenticated: bool = Field(..., description="Whether Keycloak auth is successful") + + +class SupportedLanguagesResponse(BaseModel): + """Response model for supported languages""" + languages: list[str] = Field(..., description="List of supported programming languages") diff --git a/CodeTranslation/api/requirements.txt b/CodeTranslation/api/requirements.txt new file mode 100644 index 0000000000..b4622e7c38 --- /dev/null +++ b/CodeTranslation/api/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.115.5 +uvicorn==0.32.1 +pydantic==2.10.3 +pydantic-settings==2.6.1 +python-multipart==0.0.17 +requests==2.32.3 +httpx==0.28.1 +openai==1.57.2 +pypdf==6.1.1 diff --git a/CodeTranslation/api/server.py b/CodeTranslation/api/server.py new file mode 100644 index 0000000000..f8347fde8a --- /dev/null +++ b/CodeTranslation/api/server.py @@ -0,0 +1,233 @@ +""" +FastAPI server with routes for Code Translation API +""" + +import os +import tempfile +import logging +from contextlib import asynccontextmanager +from fastapi import FastAPI, File, UploadFile, HTTPException, status +from fastapi.middleware.cors import CORSMiddleware + +import config +from models import ( + TranslateRequest, TranslateResponse, UploadPdfResponse, + HealthResponse, SupportedLanguagesResponse +) +from services import ( + get_api_client, extract_code_from_pdf, validate_pdf_file +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Lifespan context manager for FastAPI app""" + # Startup + try: + api_client = get_api_client() + app.state.api_client = api_client + logger.info("✓ API client initialized with Keycloak authentication") + except Exception as e: + logger.error(f"Failed to initialize API client: {str(e)}") + app.state.api_client = None + + yield + + # Shutdown + logger.info("Shutting down Code Translation API") + + +# Initialize FastAPI app +app = FastAPI( + title=config.APP_TITLE, + description=config.APP_DESCRIPTION, + version=config.APP_VERSION, + lifespan=lifespan +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=config.CORS_ALLOW_ORIGINS, + allow_credentials=config.CORS_ALLOW_CREDENTIALS, + allow_methods=config.CORS_ALLOW_METHODS, + allow_headers=config.CORS_ALLOW_HEADERS, +) + + +# ==================== Routes ==================== + +@app.get("/") +def root(): + """Root endpoint""" + return { + "message": "Code Translation API is running", + "version": config.APP_VERSION, + "status": "healthy", + "api_client_authenticated": app.state.api_client is not None + } + + +@app.get("/health", response_model=HealthResponse) +def health_check(): + """Detailed health check""" + return HealthResponse( + status="healthy", + model_configured=bool(config.INFERENCE_MODEL_NAME), + keycloak_authenticated=app.state.api_client is not None and app.state.api_client.is_authenticated() + ) + + +@app.get("/languages", response_model=SupportedLanguagesResponse) +def get_supported_languages(): + """Get list of supported programming languages""" + return SupportedLanguagesResponse( + languages=config.SUPPORTED_LANGUAGES + ) + + +@app.post("/translate", response_model=TranslateResponse) +def translate_code_endpoint(request: TranslateRequest): + """ + Translate code from one language to another + + - **source_code**: Code to translate + - **source_language**: Source programming language (java, c, cpp, python, rust, go) + - **target_language**: Target programming language (java, c, cpp, python, rust, go) + """ + if not app.state.api_client: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="API client not initialized. Check Keycloak configuration." + ) + + # Validate languages + if request.source_language.lower() not in config.SUPPORTED_LANGUAGES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Source language '{request.source_language}' not supported. Supported: {', '.join(config.SUPPORTED_LANGUAGES)}" + ) + + if request.target_language.lower() not in config.SUPPORTED_LANGUAGES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Target language '{request.target_language}' not supported. Supported: {', '.join(config.SUPPORTED_LANGUAGES)}" + ) + + # Check code length + if len(request.source_code) > config.MAX_CODE_LENGTH: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Code too long. Maximum length is {config.MAX_CODE_LENGTH} characters" + ) + + try: + logger.info(f"Translating code from {request.source_language} to {request.target_language}") + + # Translate code using API client + translated_code = app.state.api_client.translate_code( + source_code=request.source_code, + source_lang=request.source_language, + target_lang=request.target_language + ) + + if not translated_code: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Translation failed. No output received from model." + ) + + logger.info(f"✓ Successfully translated code") + + return TranslateResponse( + translated_code=translated_code, + source_language=request.source_language, + target_language=request.target_language, + original_code=request.source_code + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error translating code: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error translating code: {str(e)}" + ) + + +@app.post("/upload-pdf", response_model=UploadPdfResponse) +async def upload_pdf(file: UploadFile = File(...)): + """ + Upload a PDF file and extract code from it + + - **file**: PDF file containing code (max 10MB) + """ + tmp_path = None + try: + # Read file content + content = await file.read() + file_size = len(content) + + # Validate file + validate_pdf_file(file.filename, file_size, config.MAX_FILE_SIZE) + + logger.info(f"Processing PDF: {file.filename} ({file_size / 1024:.2f} KB)") + + # Save to temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: + tmp.write(content) + tmp_path = tmp.name + logger.info(f"Saved to temporary path: {tmp_path}") + + # Extract code from PDF + extracted_code = extract_code_from_pdf(tmp_path) + + if not extracted_code.strip(): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No code content could be extracted from the PDF" + ) + + logger.info(f"✓ Successfully extracted code from PDF: {file.filename}") + + return UploadPdfResponse( + message=f"Successfully extracted code from '{file.filename}'", + extracted_code=extracted_code, + status="success" + ) + + except HTTPException: + raise + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) + except Exception as e: + logger.error(f"Error processing PDF: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error processing PDF: {str(e)}" + ) + finally: + # Clean up temporary file + if tmp_path and os.path.exists(tmp_path): + try: + os.remove(tmp_path) + logger.info(f"Cleaned up temporary file: {tmp_path}") + except Exception as e: + logger.warning(f"Could not remove temporary file: {str(e)}") + + +# Entry point for running with uvicorn +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5001) diff --git a/CodeTranslation/api/services/__init__.py b/CodeTranslation/api/services/__init__.py new file mode 100644 index 0000000000..223ab23723 --- /dev/null +++ b/CodeTranslation/api/services/__init__.py @@ -0,0 +1,13 @@ +""" +Services module exports +""" + +from .api_client import get_api_client, APIClient +from .pdf_service import extract_code_from_pdf, validate_pdf_file + +__all__ = [ + 'get_api_client', + 'APIClient', + 'extract_code_from_pdf', + 'validate_pdf_file' +] diff --git a/CodeTranslation/api/services/api_client.py b/CodeTranslation/api/services/api_client.py new file mode 100644 index 0000000000..ee41827973 --- /dev/null +++ b/CodeTranslation/api/services/api_client.py @@ -0,0 +1,150 @@ +""" +API Client for Keycloak authentication and API calls +""" + +import logging +import requests +import httpx +from typing import Optional +import config + +logger = logging.getLogger(__name__) + + +class APIClient: + """ + Client for handling Keycloak authentication and API calls + """ + + def __init__(self): + self.base_url = config.BASE_URL + self.token = None + self.http_client = None + self._authenticate() + + def _authenticate(self) -> None: + """ + Authenticate and obtain access token from Keycloak + """ + token_url = f"{self.base_url}/token" + payload = { + "grant_type": "client_credentials", + "client_id": config.KEYCLOAK_CLIENT_ID, + "client_secret": config.KEYCLOAK_CLIENT_SECRET, + } + + try: + response = requests.post(token_url, data=payload, verify=False) + + if response.status_code == 200: + self.token = response.json().get("access_token") + logger.info(f"✓ Access token obtained: {self.token[:20]}..." if self.token else "Failed to get token") + + # Create httpx client with SSL verification disabled + self.http_client = httpx.Client(verify=False) + + else: + logger.error(f"Error obtaining token: {response.status_code} - {response.text}") + raise Exception(f"Authentication failed: {response.status_code}") + + except Exception as e: + logger.error(f"Error during authentication: {str(e)}") + raise + + def get_inference_client(self): + """ + Get OpenAI-style client for code generation inference + Uses CodeLlama-34b-instruct endpoint + """ + from openai import OpenAI + + return OpenAI( + api_key=self.token, + base_url=f"{self.base_url}/{config.INFERENCE_MODEL_ENDPOINT}/v1", + http_client=self.http_client + ) + + def translate_code(self, source_code: str, source_lang: str, target_lang: str) -> str: + """ + Translate code from one language to another using CodeLlama-34b-instruct + + Args: + source_code: Code to translate + source_lang: Source programming language + target_lang: Target programming language + + Returns: + Translated code + """ + try: + client = self.get_inference_client() + + # Create prompt for code translation + prompt = f"""Translate the following {source_lang} code to {target_lang}. +Only output the translated code without any explanations or markdown formatting. + +{source_lang} code: +``` +{source_code} +``` + +{target_lang} code: +```""" + + logger.info(f"Translating code from {source_lang} to {target_lang}") + + # Use completions endpoint for CodeLlama + response = client.completions.create( + model=config.INFERENCE_MODEL_NAME, + prompt=prompt, + max_tokens=config.LLM_MAX_TOKENS, + temperature=config.LLM_TEMPERATURE, + stop=["```"] # Stop at closing code block + ) + + # Handle response structure + if hasattr(response, 'choices') and len(response.choices) > 0: + choice = response.choices[0] + if hasattr(choice, 'text'): + translated_code = choice.text.strip() + logger.info(f"Successfully translated code ({len(translated_code)} characters)") + return translated_code + else: + logger.error(f"Unexpected response structure: {type(choice)}, {choice}") + return "" + else: + logger.error(f"Unexpected response: {type(response)}, {response}") + return "" + except Exception as e: + logger.error(f"Error translating code: {str(e)}", exc_info=True) + raise + + def is_authenticated(self) -> bool: + """ + Check if client is authenticated + """ + return self.token is not None + + def __del__(self): + """ + Cleanup: close httpx client + """ + if self.http_client: + self.http_client.close() + + +# Global API client instance +_api_client: Optional[APIClient] = None + + +def get_api_client() -> APIClient: + """ + Get or create the global API client instance + + Returns: + APIClient instance + """ + global _api_client + if _api_client is None: + _api_client = APIClient() + return _api_client diff --git a/CodeTranslation/api/services/pdf_service.py b/CodeTranslation/api/services/pdf_service.py new file mode 100644 index 0000000000..abf857e9f5 --- /dev/null +++ b/CodeTranslation/api/services/pdf_service.py @@ -0,0 +1,128 @@ +""" +PDF Code Extraction Service +Extracts code snippets from PDF documents +""" + +import logging +import re +from pathlib import Path +from typing import List +from pypdf import PdfReader + +logger = logging.getLogger(__name__) + + +def extract_code_from_pdf(pdf_path: str) -> str: + """ + Extract code content from a PDF file + + Args: + pdf_path: Path to the PDF file + + Returns: + Extracted code as string + + Raises: + Exception if PDF cannot be processed + """ + try: + logger.info(f"Extracting code from PDF: {pdf_path}") + + with open(pdf_path, 'rb') as file: + pdf_reader = PdfReader(file) + num_pages = len(pdf_reader.pages) + + logger.info(f"PDF has {num_pages} pages") + + # Extract text from all pages + all_text = "" + for page_num in range(num_pages): + page = pdf_reader.pages[page_num] + text = page.extract_text() + all_text += text + "\n" + + logger.info(f"Extracted {len(all_text)} characters from PDF") + + # Try to identify and extract code blocks + # Look for common code patterns + code_content = extract_code_patterns(all_text) + + if not code_content.strip(): + # If no code patterns found, return all text + code_content = all_text + + logger.info(f"Extracted code content: {len(code_content)} characters") + + return code_content.strip() + + except Exception as e: + logger.error(f"Error extracting code from PDF: {str(e)}", exc_info=True) + raise Exception(f"Failed to extract code from PDF: {str(e)}") + + +def extract_code_patterns(text: str) -> str: + """ + Extract code patterns from text + + Args: + text: Text content to search + + Returns: + Extracted code snippets + """ + # Look for code between common delimiters + code_blocks = [] + + # Pattern 1: Code between ``` markers + markdown_code = re.findall(r'```[\w]*\n(.*?)\n```', text, re.DOTALL) + code_blocks.extend(markdown_code) + + # Pattern 2: Indented code blocks (4+ spaces) + indented_code = re.findall(r'(?:^ .+$)+', text, re.MULTILINE) + code_blocks.extend(indented_code) + + # Pattern 3: Code with common keywords (class, def, function, etc.) + keyword_patterns = [ + r'(?:public|private|protected)?\s*class\s+\w+.*?\{.*?\}', # Java/C++ classes + r'def\s+\w+\(.*?\):.*?(?=\n(?!\s))', # Python functions + r'function\s+\w+\(.*?\)\s*\{.*?\}', # JavaScript functions + r'fn\s+\w+\(.*?\)\s*\{.*?\}', # Rust functions + r'func\s+\w+\(.*?\)\s*\{.*?\}', # Go functions + ] + + for pattern in keyword_patterns: + matches = re.findall(pattern, text, re.DOTALL | re.MULTILINE) + code_blocks.extend(matches) + + if code_blocks: + return '\n\n'.join(code_blocks) + + # If no patterns match, return original text + return text + + +def validate_pdf_file(filename: str, file_size: int, max_size: int) -> None: + """ + Validate uploaded PDF file + + Args: + filename: Name of the file + file_size: Size of the file in bytes + max_size: Maximum allowed file size in bytes + + Raises: + ValueError if validation fails + """ + # Check file extension + if not filename.lower().endswith('.pdf'): + raise ValueError("Only PDF files are allowed") + + # Check file size + if file_size > max_size: + max_size_mb = max_size / (1024 * 1024) + raise ValueError(f"File too large. Maximum size is {max_size_mb}MB") + + if file_size == 0: + raise ValueError("Empty file uploaded") + + logger.info(f"PDF file validation passed: {filename} ({file_size / 1024:.2f} KB)") \ No newline at end of file diff --git a/CodeTranslation/docker-compose.yaml b/CodeTranslation/docker-compose.yaml new file mode 100644 index 0000000000..b2c08940aa --- /dev/null +++ b/CodeTranslation/docker-compose.yaml @@ -0,0 +1,48 @@ +version: '3.8' + +services: + backend: + build: + context: ./api + dockerfile: Dockerfile + container_name: code-trans-backend + ports: + - "5001:5001" + env_file: + - .env + environment: + - BASE_URL=${BASE_URL} + - KEYCLOAK_CLIENT_ID=${KEYCLOAK_CLIENT_ID} + - KEYCLOAK_CLIENT_SECRET=${KEYCLOAK_CLIENT_SECRET} + - INFERENCE_MODEL_ENDPOINT=${INFERENCE_MODEL_ENDPOINT} + - INFERENCE_MODEL_NAME=${INFERENCE_MODEL_NAME} + - LLM_TEMPERATURE=${LLM_TEMPERATURE:-0.2} + - LLM_MAX_TOKENS=${LLM_MAX_TOKENS:-4096} + - MAX_CODE_LENGTH=${MAX_CODE_LENGTH:-10000} + - MAX_FILE_SIZE=${MAX_FILE_SIZE:-10485760} + networks: + - code-trans-network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5001/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + frontend: + build: + context: ./ui + dockerfile: Dockerfile + container_name: code-trans-frontend + ports: + - "3000:80" + depends_on: + - backend + networks: + - code-trans-network + restart: unless-stopped + +networks: + code-trans-network: + driver: bridge diff --git a/CodeTranslation/images/ui.png b/CodeTranslation/images/ui.png new file mode 100644 index 0000000000..ab4ed77e9e Binary files /dev/null and b/CodeTranslation/images/ui.png differ diff --git a/CodeTranslation/ui/.dockerignore b/CodeTranslation/ui/.dockerignore new file mode 100644 index 0000000000..bd3f4adad9 --- /dev/null +++ b/CodeTranslation/ui/.dockerignore @@ -0,0 +1,12 @@ +node_modules +npm-debug.log +.git +.gitignore +.DS_Store +.env +.env.local +.env.production +dist +build +coverage +*.log diff --git a/CodeTranslation/ui/Dockerfile b/CodeTranslation/ui/Dockerfile new file mode 100644 index 0000000000..efb238bbae --- /dev/null +++ b/CodeTranslation/ui/Dockerfile @@ -0,0 +1,29 @@ +# Build stage +FROM node:18-alpine as build + +WORKDIR /app + +# Copy package files +COPY package*.json ./ + +# Install dependencies +RUN npm install + +# Copy application code +COPY . . + +# Build the application +RUN npm run build + +# Production stage +FROM nginx:alpine + +# Copy built assets from build stage +COPY --from=build /app/dist /usr/share/nginx/html + +# Copy nginx configuration +COPY nginx.conf /etc/nginx/conf.d/default.conf + +EXPOSE 80 + +CMD ["nginx", "-g", "daemon off;"] diff --git a/CodeTranslation/ui/index.html b/CodeTranslation/ui/index.html new file mode 100644 index 0000000000..7b4a4f671a --- /dev/null +++ b/CodeTranslation/ui/index.html @@ -0,0 +1,13 @@ + + + + + + + Code Translation - AI-Powered Code Converter + + +
+ + + diff --git a/CodeTranslation/ui/nginx.conf b/CodeTranslation/ui/nginx.conf new file mode 100644 index 0000000000..8b576ede27 --- /dev/null +++ b/CodeTranslation/ui/nginx.conf @@ -0,0 +1,23 @@ +server { + listen 80; + server_name localhost; + root /usr/share/nginx/html; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } + + location /api/ { + rewrite ^/api/(.*)$ /$1 break; + proxy_pass http://backend:5001; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} diff --git a/CodeTranslation/ui/package.json b/CodeTranslation/ui/package.json new file mode 100644 index 0000000000..310f586320 --- /dev/null +++ b/CodeTranslation/ui/package.json @@ -0,0 +1,31 @@ +{ + "name": "code-trans-ui", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview", + "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0" + }, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0", + "axios": "^1.6.0", + "lucide-react": "^0.294.0" + }, + "devDependencies": { + "@types/react": "^18.2.43", + "@types/react-dom": "^18.2.17", + "@vitejs/plugin-react": "^4.2.1", + "autoprefixer": "^10.4.16", + "eslint": "^8.55.0", + "eslint-plugin-react": "^7.33.2", + "eslint-plugin-react-hooks": "^4.6.0", + "eslint-plugin-react-refresh": "^0.4.5", + "postcss": "^8.4.32", + "tailwindcss": "^3.3.6", + "vite": "^5.0.8" + } +} diff --git a/CodeTranslation/ui/postcss.config.js b/CodeTranslation/ui/postcss.config.js new file mode 100644 index 0000000000..2e7af2b7f1 --- /dev/null +++ b/CodeTranslation/ui/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/CodeTranslation/ui/src/App.jsx b/CodeTranslation/ui/src/App.jsx new file mode 100644 index 0000000000..185fdf9fbc --- /dev/null +++ b/CodeTranslation/ui/src/App.jsx @@ -0,0 +1,76 @@ +import { useState } from 'react' +import CodeTranslator from './components/CodeTranslator' +import PDFUploader from './components/PDFUploader' +import Header from './components/Header' +import StatusBar from './components/StatusBar' + +function App() { + const [translationStatus, setTranslationStatus] = useState('idle') // idle, translating, success, error + const [sourceLanguage, setSourceLanguage] = useState('python') + const [targetLanguage, setTargetLanguage] = useState('java') + const [pdfExtractedCode, setPdfExtractedCode] = useState('') + const [isUploading, setIsUploading] = useState(false) + + const handleTranslationStart = () => { + setTranslationStatus('translating') + } + + const handleTranslationSuccess = () => { + setTranslationStatus('success') + setTimeout(() => setTranslationStatus('idle'), 3000) + } + + const handleTranslationError = () => { + setTranslationStatus('error') + setTimeout(() => setTranslationStatus('idle'), 3000) + } + + const handlePDFUploadSuccess = (extractedCode) => { + setPdfExtractedCode(extractedCode) + setIsUploading(false) + } + + const handlePDFUploadStart = () => { + setIsUploading(true) + } + + return ( +
+
+ +
+ {/* Status Bar */} + + + {/* Main Code Translator - Side by Side */} +
+ +
+ + {/* PDF Uploader at Bottom */} +
+ +
+
+
+ ) +} + +export default App diff --git a/CodeTranslation/ui/src/components/CodeTranslator.jsx b/CodeTranslation/ui/src/components/CodeTranslator.jsx new file mode 100644 index 0000000000..b646d6f734 --- /dev/null +++ b/CodeTranslation/ui/src/components/CodeTranslator.jsx @@ -0,0 +1,212 @@ +import { useState, useEffect } from 'react' +import { ArrowRight, Code, Copy, Check } from 'lucide-react' +import axios from 'axios' + +const LANGUAGES = ['java', 'c', 'cpp', 'python', 'rust', 'go'] + +const LANGUAGE_LABELS = { + 'java': 'JAVA', + 'c': 'C', + 'cpp': 'C++', + 'python': 'PYTHON', + 'rust': 'RUST', + 'go': 'GO' +} + +const API_URL = import.meta.env.VITE_API_URL || '/api' + +export default function CodeTranslator({ + onTranslationStart, + onTranslationSuccess, + onTranslationError, + pdfExtractedCode, + sourceLanguage, + targetLanguage, + onSourceLanguageChange, + onTargetLanguageChange +}) { + const [sourceCode, setSourceCode] = useState('') + const [translatedCode, setTranslatedCode] = useState('') + const [isTranslating, setIsTranslating] = useState(false) + const [copied, setCopied] = useState(false) + + // When PDF code is extracted, set it as source code + useEffect(() => { + if (pdfExtractedCode) { + setSourceCode(pdfExtractedCode) + } + }, [pdfExtractedCode]) + + const handleTranslate = async () => { + if (!sourceCode.trim()) { + alert('Please enter code to translate') + return + } + + if (sourceLanguage === targetLanguage) { + alert('Source and target languages must be different') + return + } + + setIsTranslating(true) + onTranslationStart() + + try { + const response = await axios.post(`${API_URL}/translate`, { + source_code: sourceCode, + source_language: sourceLanguage, + target_language: targetLanguage + }) + + setTranslatedCode(response.data.translated_code) + onTranslationSuccess() + } catch (error) { + console.error('Translation error:', error) + onTranslationError() + alert(error.response?.data?.detail || 'Translation failed') + } finally { + setIsTranslating(false) + } + } + + const handleCopy = () => { + navigator.clipboard.writeText(translatedCode) + setCopied(true) + setTimeout(() => setCopied(false), 2000) + } + + return ( +
+
+ +

Code Translator

+
+ + {/* Language Selection */} +
+
+ + +
+ +
+ +
+ +
+ + +
+
+ + {/* Side by Side Code Boxes */} +
+ {/* Source Code Input */} +
+
+ + 10000 ? 'text-red-600 font-semibold' : 'text-gray-500'}`}> + {sourceCode.length.toLocaleString()} / 10,000 characters + +
+