Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions ai/agent_compiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""
Compiler Agent — ai/agent_compiler.py

Agent 0: Traduce promptul natural în JSON (Reguli).
Uses PydanticOutputParser to extract CompiledRule from user natural language prompts.
"""

from __future__ import annotations

import logging
from typing import Any

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field

from ai.llm_config import get_llm, MODEL_CLASSIFIER

logger = logging.getLogger(__name__)

# =====================================================================
# Pydantic schema — formatul regulii
# =====================================================================


class CompiledRule(BaseModel):
"""Regulă de organizare fișiere, compilată dintr-un prompt natural."""

category: str = Field(
...,
description="Categoria sau tipul fișierului (e.g., 'factura', 'curs', 'contract').",
)
folder_structure: str = Field(
...,
description="Structura de directoare unde va fi salvat fișierul (e.g., 'Facturi', 'Facultate/Materie').",
)
naming_convention: str = Field(
default="{original_filename}",
description="Convenția de denumire a fișierului. Dacă utilizatorul nu specifică, folosește un format implicit sau '{original_filename}'.",
)


# =====================================================================
# Prompt template
# =====================================================================

_SYSTEM_PROMPT_TEMPLATE = """\
You are an expert rule translator for the ClutterKill system.
Your job is to translate a user's natural language instruction about where and how to save files into a structured JSON rule.

User instruction: "{user_prompt}"

Extract the category, folder structure, and naming convention.
If the naming convention is not explicitly stated, use a default placeholder like "{{original_filename}}" or infer a sensible one if the context implies it.

IMPORTANT: You must return ONLY the raw JSON object containing the actual values. Do NOT return a JSON schema, and do NOT wrap your answer in "properties".

Example of valid output:
{{
"category": "factura",
"folder_structure": "Facturi",
"naming_convention": "factura_data.pdf"
}}

{format_instructions}
"""


# =====================================================================
# CompilerAgent
# =====================================================================


class CompilerAgent:
"""Agent that translates natural language prompts into structured JSON rules.

Usage::

agent = CompilerAgent()
rule = agent.compile("Cursurile merg în folderul Facultate/Materie")
print(rule.model_dump_json())
"""

def __init__(self, llm: Any | None = None) -> None:
self._llm = llm or get_llm(model=MODEL_CLASSIFIER)
self._parser = PydanticOutputParser(pydantic_object=CompiledRule)

self._prompt = PromptTemplate(
template=_SYSTEM_PROMPT_TEMPLATE,
input_variables=["user_prompt"],
partial_variables={
"format_instructions": self._parser.get_format_instructions()
},
)

self._chain = self._prompt | self._llm | self._parser

# ── public API ───────────────────────────────────────────────────

def compile(self, user_prompt: str) -> CompiledRule:
"""Translate a natural language prompt into a CompiledRule.

Parameters
----------
user_prompt : str
Natural language instruction from the user (e.g., "facturile în folderul X").

Returns
-------
CompiledRule
Validated Pydantic model with category, folder_structure, and naming_convention.
"""
logger.info("CompilerAgent: compiling prompt: '%s'", user_prompt)
try:
# invoke the chain which formats the prompt, calls LLM, and parses output
result = self._chain.invoke({"user_prompt": user_prompt})
return result
except Exception as e:
logger.error("Failed to compile rule: %s", e)
raise


# ─── Quick self-test ─────────────────────────────────────────────────
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)

agent = CompilerAgent()

test_prompts = [
"Cursurile merg în folderul Facultate/Materie",
"facturile în folderul Facturi_Curente și le denumești factura_data.pdf",
]

for p in test_prompts:
print(f"\n{'=' * 60}")
print(f"Input: {p}")
try:
res = agent.compile(p)
print(f"Output JSON: {res.model_dump_json(indent=2)}")
except Exception as e:
print(f"Error: {e}")
print(f"{'=' * 60}")
219 changes: 219 additions & 0 deletions ai/agent_decider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
"""
Decider Agent — ai/agent_decider.py

Agent 2: Primește rezumatul (A1) și regula (A0) și decide ce face cu fișierul.
Uses PydanticOutputParser with the class ActionDecision(status, suggested_name, suggested_folder).
"""

from __future__ import annotations

import logging
import re
from typing import Any, Literal

from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from pydantic import BaseModel, Field, field_validator

from ai.llm_config import get_llm, MODEL_CLASSIFIER
from ai.agent_compiler import CompiledRule

logger = logging.getLogger(__name__)

_MAX_RETRIES = 2

# =====================================================================
# Pydantic schema — formatul deciziei
# =====================================================================


class ActionDecision(BaseModel):
"""Decizia finală luată de Agent 2."""

status: Literal["move", "quarantine"] = Field(
...,
description="Statusul deciziei: 'move' dacă fișierul corespunde regulii, 'quarantine' dacă nu sau dacă informațiile lipsesc.",
)
suggested_name: str = Field(
...,
description="Numele sugerat pentru fișier (conform naming_convention din regulă). Dacă e carantină, se păstrează numele original.",
)
suggested_folder: str = Field(
...,
description="Folderul de destinație. Dacă e 'quarantine', valoarea va fi 'Quarantine'.",
)

@field_validator("suggested_name")
@classmethod
def sanitize_filename(cls, v: str) -> str:
"""Asigură-te că numele fișierului nu conține caractere invalide."""
# Îndepărtăm caracterele care ar putea cauza erori de filepath
sanitized = re.sub(r'[<>:"/\\|?*]', "_", v)
return sanitized


# =====================================================================
# Prompt template
# =====================================================================

_SYSTEM_PROMPT_TEMPLATE = """\
You are an expert decision-making agent for the ClutterKill system.
Your job is to analyze a document summary and a set of organization rules, and decide if the document should be moved to the correct folder or placed in quarantine.

Rule Category: {rule_category}
Target Folder: {rule_folder}
Naming Convention: {rule_naming}

Document Summary:
{document_summary}

Original Filename: {original_filename}

Instructions:
1. If the Document Summary MATCHES the Rule Category, your status must be "move".
2. If it DOES NOT match, or if you are unsure, your status must be "quarantine".
3. Calculate the new filename based on the Naming Convention. If the naming convention includes {{original_filename}}, replace it with the actual original filename.
4. If the status is "quarantine", the folder must be "Quarantine".
5. If the status is "quarantine", the suggested_name MUST be exactly the Original Filename.

IMPORTANT: You must return ONLY the raw JSON object containing the actual values. Do NOT return a JSON schema, and do NOT wrap your answer in markdown fences (like ```json).

{format_instructions}
"""

_REPAIR_PROMPT = ChatPromptTemplate.from_messages(
[
(
"system",
"You previously attempted to output a JSON decision but your JSON was invalid or did not match the schema. "
"Fix the JSON below so it matches the required schema exactly. "
"Output ONLY valid JSON, nothing else.",
),
("human", "Broken output:\n{broken_json}\n\nValidation error:\n{error}"),
]
)

# =====================================================================
# DeciderAgent
# =====================================================================


class DeciderAgent:
"""Agent that decides whether a file matches a rule based on its summary.

Usage::

agent = DeciderAgent()
decision = agent.decide(summary, original_filename, rule)
print(decision.model_dump_json())
"""

def __init__(self, llm: Any | None = None) -> None:
self._llm = llm or get_llm(model=MODEL_CLASSIFIER)
self._parser = PydanticOutputParser(pydantic_object=ActionDecision)

self._prompt = PromptTemplate(
template=_SYSTEM_PROMPT_TEMPLATE,
input_variables=[
"rule_category",
"rule_folder",
"rule_naming",
"document_summary",
"original_filename",
],
partial_variables={
"format_instructions": self._parser.get_format_instructions()
},
)

self._chain = self._prompt | self._llm | StrOutputParser()
self._repair_chain = _REPAIR_PROMPT | self._llm | StrOutputParser()

# ── public API ───────────────────────────────────────────────────

def decide(
self, summary: str, original_filename: str, rule: CompiledRule
) -> ActionDecision:
"""Decide the fate of a document.

Parameters
----------
summary : str
Technical summary extracted from the document.
original_filename : str
The original name of the file.
rule : CompiledRule
The compiled rule containing category, folder, and naming.

Returns
-------
ActionDecision
Validated Pydantic model with status, suggested_name, and suggested_folder.
"""
logger.info(
"DeciderAgent: Evaluăm documentul '%s' față de regula '%s'",
original_filename,
rule.category,
)

raw_output = self._chain.invoke(
{
"rule_category": rule.category,
"rule_folder": rule.folder_structure,
"rule_naming": rule.naming_convention,
"document_summary": summary,
"original_filename": original_filename,
}
)

last_error: Exception | None = None
current_output = raw_output

for attempt in range(_MAX_RETRIES + 1):
try:
# Funcția self._parser.parse suportă markdown fences fallback din LangChain
return self._parser.parse(current_output)
except Exception as exc:
last_error = exc
logger.warning(
"Parse attempt %d/%d failed: %s",
attempt + 1,
_MAX_RETRIES + 1,
exc,
)
if attempt < _MAX_RETRIES:
current_output = self._repair_chain.invoke(
{"broken_json": current_output, "error": str(exc)}
)

logger.error("Eroare în timpul deciziei (după retry-uri): %s", last_error)
raise ValueError(
f"Failed to parse decision after retries: {last_error}"
) from last_error


# ─── Quick self-test ─────────────────────────────────────────────────
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)

agent = DeciderAgent()

test_rule = CompiledRule(
category="factură",
folder_structure="Facturi_Luna_Curenta",
naming_convention="factura_enel_10/20.pdf", # intenționat cu caractere invalide pentru test
)

test_summary_match = "Emitent: ENEL SA, Dată: 12.05.2023, Sumă: 150 RON, Tip: Factură energie electrică."

test_filename = "doc_scanned_123.pdf"

print(f"\n{'=' * 60}")
print("TEST 1: Sanitizare și Retry")
try:
decision1 = agent.decide(test_summary_match, test_filename, test_rule)
print("Output JSON (observă cum / a fost înlocuit):")
print(decision1.model_dump_json(indent=2))
except Exception as e:
print(f"Error: {e}")
print(f"{'=' * 60}\n")
29 changes: 29 additions & 0 deletions tests/test_agent_decider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Tests for Agent 2 - Decider
"""

import pytest
from pydantic import ValidationError

from ai.agent_decider import ActionDecision


def test_sanitize_filename():
"""Testează dacă validatorul Pydantic curăță caracterele ilegale."""
decision = ActionDecision(
status="move",
suggested_name="factura/enel:2023.pdf",
suggested_folder="Facturi",
)
# slash-ul și colon-ul trebuie înlocuite cu underscore
assert decision.suggested_name == "factura_enel_2023.pdf"


def test_invalid_status_raises_error():
"""Testează dacă Pydantic blochează statusuri inventate."""
with pytest.raises(ValidationError):
ActionDecision(
status="delete", # Invalid status, doar move sau quarantine permise
suggested_name="doc.pdf",
suggested_folder="Trash",
)
Loading