diff --git a/CLAUDE.md b/CLAUDE.md index 339ded4..00a6653 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -30,6 +30,10 @@ aidocs mcp docs/ # Start MCP server for docs directory # PDF export aidocs export-pdf docs/page.md + +# Watch mode (auto-sync on file changes) +aidocs watch # Watch docs/ and auto-chunk on changes +aidocs watch --with-vectors # Also generate embeddings ``` ## Architecture @@ -37,13 +41,14 @@ aidocs export-pdf docs/page.md ``` src/aidocs_cli/ ├── __init__.py # Version and entry point -├── cli.py # Typer CLI commands (init, check, serve, rag-*, export-pdf) +├── cli.py # Typer CLI commands (init, check, serve, rag-*, export-pdf, watch) ├── installer.py # Copies templates to target project (.claude/commands/, .claude/workflows/) ├── chunker.py # Splits markdown at ## headings for RAG ├── embeddings.py # OpenAI embeddings + SQL generation for pgvector ├── server.py # MkDocs config generation and nav discovery ├── pdf_exporter.py # Markdown→HTML→PDF with Chrome/Playwright ├── mcp_server.py # MCP server exposing docs via tools (list, search, read) +├── watcher.py # File system watcher for auto-sync (watchdog + Rich Live) └── templates/ ├── commands/docs/ # Slash command definitions (*.md) └── workflows/ # Workflow implementations per command @@ -78,7 +83,7 @@ Version is defined in two places (keep in sync): ## Dependencies -Core: typer, rich, httpx, mkdocs, mkdocs-material, pyyaml, mcp +Core: typer, rich, httpx, mkdocs, mkdocs-material, pyyaml, mcp, watchdog, python-dotenv Python 3.11+ required. Build system uses hatchling. diff --git a/README.md b/README.md index d60a4ea..f7ec0ac 100644 --- a/README.md +++ b/README.md @@ -489,6 +489,65 @@ Once configured, Claude Code can use these tools automatically. You can prompt: | Empty search results | Ensure docs directory has `.md` files | | Slow searches | Run `aidocs rag` to pre-chunk files | +### `aidocs watch` + +Watch documentation directory for changes and automatically re-chunk files and regenerate embeddings. + +```bash +aidocs watch # Watch docs/ with auto-embeddings +aidocs watch --with-vectors # Generate also embeddings +aidocs watch --debounce 5 # Wait 5 seconds before processing +aidocs watch docs/users # Watch specific subdirectory +``` + +**Options:** +| Option | Description | +|--------|-------------| +| `--with-vectors` | Include embedding generation | +| `--debounce, -d` | Seconds to wait after last change (default: 10) | +| `--table, -t` | Target table name for embeddings (default: `doc_embeddings`) | + +**What it does:** +1. Monitors the docs directory for `.md` file changes +2. Debounces rapid changes (waits 10 seconds after last edit by default) +3. Re-chunks modified files automatically +4. Generates embeddings if `OPENAI_API_KEY` is set (use `--with-vectors` to enable) +5. Updates manifest and sync state + +**Real-time display:** +``` +╭─────────────────────────────────────────╮ +│ Watching docs/ │ +│ │ +│ Last update: 14:32:05 │ +│ Files: 12 | Chunks: 45 | Embeddings: 45 │ +│ │ +│ Embeddings: enabled │ +│ │ +│ Recent: │ +│ ✓ users/index.md (3 chunks) │ +│ ✓ api/auth.md (5 chunks) │ +│ │ +│ Press Ctrl+C to stop │ +╰─────────────────────────────────────────╯ +``` + +**Use cases:** +- Keep chunks updated while editing documentation +- Auto-sync embeddings during documentation sprints +- Run alongside `aidocs serve` for a complete dev workflow + +**Example workflow:** +```bash +# Terminal 1: Watch for changes +aidocs watch + +# Terminal 2: Serve documentation +aidocs serve + +# Edit docs in your editor - changes auto-sync! +``` + ## Slash Commands After running `aidocs init`, these commands are available in Claude Code: diff --git a/pyproject.toml b/pyproject.toml index ea290b5..e4fa8ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "aidocs" -version = "0.16.0" +version = "0.17.0" description = "AI-powered documentation generator for web applications. Install docs commands into your Claude Code project." readme = "README.md" license = { text = "MIT" } @@ -26,6 +26,8 @@ dependencies = [ "mkdocs-material>=9.5.0", "pyyaml>=6.0", "mcp>=1.0.0", + "watchdog>=4.0.0", + "python-dotenv>=1.0.0", ] [project.scripts] diff --git a/src/aidocs_cli/__init__.py b/src/aidocs_cli/__init__.py index 9f43bde..f7d2b3f 100644 --- a/src/aidocs_cli/__init__.py +++ b/src/aidocs_cli/__init__.py @@ -1,6 +1,6 @@ """AI-powered documentation generator CLI for Claude Code projects.""" -__version__ = "0.16.0" +__version__ = "0.17.0" from .cli import app diff --git a/src/aidocs_cli/cli.py b/src/aidocs_cli/cli.py index fc4c111..38dec26 100644 --- a/src/aidocs_cli/cli.py +++ b/src/aidocs_cli/cli.py @@ -843,5 +843,68 @@ def mcp_command( asyncio.run(run_server(target_dir)) +@app.command("watch") +def watch( + docs_dir: Optional[str] = typer.Argument( + "docs", + help="Directory containing documentation to watch.", + ), + with_vectors: bool = typer.Option( + False, + "--with-vectors", + help="Enable embedding generation (requires OPENAI_API_KEY).", + ), + debounce: float = typer.Option( + 10.0, + "--debounce", + "-d", + help="Seconds to wait after last change before processing.", + ), + table: str = typer.Option( + "doc_embeddings", + "--table", + "-t", + help="Target table name for embeddings.", + ), +) -> None: + """Watch documentation directory and auto-sync on changes. + + Monitors the docs directory for markdown file changes and automatically: + - Re-chunks modified files + - Generates embeddings (if --with-vectors and OPENAI_API_KEY is set) + - Updates manifest and sync state + + Uses debouncing to batch rapid changes (default: 10 seconds). + + Examples: + aidocs watch # Watch docs/, chunk only + aidocs watch --with-vectors # Also generate embeddings + aidocs watch --debounce 5 # Wait 5 seconds before processing + aidocs watch docs/users # Watch specific subdirectory + """ + from .watcher import watch_docs + + target_dir = Path(docs_dir) + + if not target_dir.exists(): + console.print(f"[red]Error: Directory not found: {docs_dir}[/red]") + raise typer.Exit(1) + + if not target_dir.is_dir(): + console.print(f"[red]Error: Not a directory: {docs_dir}[/red]") + raise typer.Exit(1) + + try: + watch_docs( + target_dir, + with_vectors=with_vectors, + debounce_seconds=debounce, + table_name=table, + ) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(1) + + if __name__ == "__main__": app() diff --git a/src/aidocs_cli/watcher.py b/src/aidocs_cli/watcher.py new file mode 100644 index 0000000..75de503 --- /dev/null +++ b/src/aidocs_cli/watcher.py @@ -0,0 +1,341 @@ +"""File system watcher for auto-syncing documentation.""" + +import threading +import time +from datetime import datetime +from pathlib import Path +from typing import Optional + +from rich.console import Console +from rich.live import Live +from rich.panel import Panel +from rich.text import Text +from watchdog.events import FileSystemEvent, FileSystemEventHandler +from watchdog.observers import Observer + +from .chunker import ( + calculate_file_hash, + chunk_file, + get_chunks_path, + load_manifest, + save_chunks, + save_manifest, +) +from .embeddings import generate_embedding, get_openai_api_key, load_last_sync, save_last_sync + +console = Console() + + +class WatchState: + """Shared state for the watcher.""" + + def __init__(self, docs_dir: Path): + self.docs_dir = docs_dir + self.pending_files: set[Path] = set() + self.lock = threading.Lock() + self.last_process_time: Optional[datetime] = None + self.total_files = 0 + self.total_chunks = 0 + self.total_embeddings = 0 + self.recent_files: list[tuple[str, int, str]] = [] # (path, chunks, status) + self.api_key: Optional[str] = None + self.embeddings_enabled = False + + +class MarkdownEventHandler(FileSystemEventHandler): + """Handle file system events for markdown files.""" + + def __init__( + self, + state: WatchState, + debounce_seconds: float, + process_callback: callable, + ): + super().__init__() + self.state = state + self.debounce_seconds = debounce_seconds + self.process_callback = process_callback + self.timer: Optional[threading.Timer] = None + self.timer_lock = threading.Lock() + + def _should_process(self, path: Path) -> bool: + """Check if a file should be processed.""" + # Only process .md files + if path.suffix.lower() != ".md": + return False + + # Skip hidden directories and files + try: + rel_path = path.relative_to(self.state.docs_dir) + parts = rel_path.parts + if any(p.startswith(".") for p in parts): + return False + except ValueError: + return False + + # Skip .chunks.json files + if ".chunks" in path.name: + return False + + return True + + def _schedule_processing(self): + """Schedule file processing after debounce delay.""" + with self.timer_lock: + if self.timer: + self.timer.cancel() + + self.timer = threading.Timer( + self.debounce_seconds, + self.process_callback, + ) + self.timer.start() + + def on_modified(self, event: FileSystemEvent) -> None: + if event.is_directory: + return + + path = Path(event.src_path) + if self._should_process(path): + with self.state.lock: + self.state.pending_files.add(path) + self._schedule_processing() + + def on_created(self, event: FileSystemEvent) -> None: + if event.is_directory: + return + + path = Path(event.src_path) + if self._should_process(path): + with self.state.lock: + self.state.pending_files.add(path) + self._schedule_processing() + + def on_deleted(self, event: FileSystemEvent) -> None: + # For deleted files, we just remove from pending + # The manifest will be updated on next full sync + if event.is_directory: + return + + path = Path(event.src_path) + with self.state.lock: + self.state.pending_files.discard(path) + + +def process_pending_files( + state: WatchState, + with_vectors: bool, + table_name: str, + on_update: Optional[callable] = None, +) -> None: + """Process all pending files.""" + with state.lock: + files_to_process = list(state.pending_files) + state.pending_files.clear() + + if not files_to_process: + return + + # Load manifest + manifest = load_manifest(state.docs_dir) + last_sync = load_last_sync(state.docs_dir) if state.embeddings_enabled else None + + processed_count = 0 + chunks_count = 0 + embeddings_count = 0 + recent = [] + + for file_path in files_to_process: + if not file_path.exists(): + continue + + # Chunk the file + chunks_data = chunk_file(file_path) + if not chunks_data: + continue + + # Save chunks + chunks_path = save_chunks(chunks_data) + processed_count += 1 + file_chunks = chunks_data["total_chunks"] + chunks_count += file_chunks + + # Update manifest + rel_path = str(file_path.relative_to(state.docs_dir.parent) if state.docs_dir.parent != file_path.parent else file_path) + file_hash = calculate_file_hash(file_path) + + manifest["files"][rel_path] = { + "hash": file_hash, + "chunks_file": str(chunks_path), + "chunk_count": file_chunks, + "modified_at": datetime.now().isoformat(), + } + + status = "chunked" + + # Generate embeddings if enabled + if state.embeddings_enabled and with_vectors and state.api_key: + for chunk in chunks_data["chunks"]: + embedding = generate_embedding(chunk["content"], state.api_key) + if embedding: + embeddings_count += 1 + + # Update sync state + if last_sync: + last_sync["files"][rel_path] = { + "hash": file_hash, + "chunk_count": file_chunks, + "synced_at": datetime.now().isoformat(), + } + status = "synced" + + # Track recent file + try: + display_path = str(file_path.relative_to(state.docs_dir)) + except ValueError: + display_path = file_path.name + recent.append((display_path, file_chunks, status)) + + # Save manifest + manifest["last_run"] = datetime.now().isoformat() + save_manifest(state.docs_dir, manifest) + + # Save sync state if embeddings were generated + if last_sync and embeddings_count > 0: + last_sync["synced_at"] = datetime.now().isoformat() + save_last_sync(state.docs_dir, last_sync) + + # Update state + with state.lock: + state.last_process_time = datetime.now() + state.total_files += processed_count + state.total_chunks += chunks_count + state.total_embeddings += embeddings_count + + # Keep only last 5 recent files + state.recent_files = (recent + state.recent_files)[:5] + + if on_update: + on_update() + + +def create_status_panel(state: WatchState, with_vectors: bool) -> Panel: + """Create a Rich panel showing watcher status.""" + lines = [] + + # Watching status + lines.append(f"[bold cyan]Watching[/bold cyan] {state.docs_dir}/") + lines.append("") + + # Last update time + if state.last_process_time: + time_str = state.last_process_time.strftime("%H:%M:%S") + lines.append(f"[dim]Last update:[/dim] {time_str}") + else: + lines.append("[dim]Waiting for changes...[/dim]") + + # Stats + stats_parts = [f"Files: {state.total_files}"] + stats_parts.append(f"Chunks: {state.total_chunks}") + if state.embeddings_enabled and with_vectors: + stats_parts.append(f"Embeddings: {state.total_embeddings}") + lines.append(" | ".join(stats_parts)) + lines.append("") + + # Embeddings status + if not with_vectors: + lines.append("[yellow]Embeddings: disabled (use --with-vectors to enable)[/yellow]") + elif state.embeddings_enabled: + lines.append("[green]Embeddings: enabled[/green]") + else: + lines.append("[yellow]Embeddings: disabled (no API key)[/yellow]") + lines.append("") + + # Recent files + if state.recent_files: + lines.append("[bold]Recent:[/bold]") + for path, chunks, status in state.recent_files: + if status == "synced": + icon = "[green]✓[/green]" + else: + icon = "[blue]○[/blue]" + lines.append(f" {icon} {path} ({chunks} chunks)") + else: + lines.append("[dim]No files processed yet[/dim]") + + lines.append("") + lines.append("[dim]Press Ctrl+C to stop[/dim]") + + content = "\n".join(lines) + return Panel( + content, + title="[bold]aidocs watch[/bold]", + border_style="blue", + ) + + +def watch_docs( + docs_dir: Path, + with_vectors: bool = False, + debounce_seconds: float = 2.0, + table_name: str = "doc_embeddings", +) -> None: + """Watch a documentation directory for changes and auto-sync. + + Args: + docs_dir: Directory to watch + with_vectors: Enable embedding generation + debounce_seconds: Wait time after last change before processing + table_name: PostgreSQL table name for embeddings + """ + state = WatchState(docs_dir) + + # Check for API key + api_key = get_openai_api_key() + if api_key and with_vectors: + state.api_key = api_key + state.embeddings_enabled = True + + # Count existing files and chunks + manifest = load_manifest(docs_dir) + state.total_files = len(manifest.get("files", {})) + state.total_chunks = sum( + f.get("chunk_count", 0) for f in manifest.get("files", {}).values() + ) + + # Create Live display + live_update_event = threading.Event() + + def on_update(): + live_update_event.set() + + def do_process(): + process_pending_files(state, with_vectors, table_name, on_update) + + # Set up file watcher + event_handler = MarkdownEventHandler(state, debounce_seconds, do_process) + observer = Observer() + observer.schedule(event_handler, str(docs_dir), recursive=True) + observer.start() + + try: + with Live( + create_status_panel(state, with_vectors), + console=console, + refresh_per_second=1, + ) as live: + while True: + # Wait for update or timeout + live_update_event.wait(timeout=1.0) + live_update_event.clear() + + # Update display + live.update(create_status_panel(state, with_vectors)) + + except KeyboardInterrupt: + console.print("\n[yellow]Stopping watcher...[/yellow]") + finally: + observer.stop() + observer.join() + + console.print("[green]Watcher stopped.[/green]")