diff --git a/.claude/commands/init-project.md b/.claude/commands/init-project.md new file mode 100644 index 0000000..996e6b1 --- /dev/null +++ b/.claude/commands/init-project.md @@ -0,0 +1,713 @@ +--- +description: Initialize new project with comprehensive documentation (overview, specs, tech stack, architecture) +argument-hint: "[project-description] [--file FILE] [--minimal] [--no-sync] [--no-arch]" +allowed-tools: Read, Write, Bash, Skill, Glob +model: claude-sonnet-4-5-20250929 +--- + +# Init Project: Bootstrap Project Foundation + +## Introduction + +Transform a project idea into complete foundational documentation including project overview, technical specifications, technology stack selection, and system architecture design. + +**Purpose**: Create the documentation foundation before writing any code - ensures alignment, reduces rework, and provides clear technical direction. + +**Output Structure:** +``` +./project-management/ +├── PROJECT-OVERVIEW.md # Vision, goals, features, success criteria +├── SPECIFICATIONS.md # Functional/non-functional requirements, API contracts, data models +├── TECH-STACK.md # Technology selections with rationale and trade-offs +├── ARCHITECTURE.md # System design with mermaid diagrams +└── .meta/ + └── last-sync.json # Tracking metadata for document sync +``` + +**Integration**: Generated documents serve as input for `/lazy plan` when creating user stories. + +--- + +## When to Use + +**Use `/lazy init-project` when:** +- Starting a brand new greenfield project +- Need structured project documentation before coding +- Want technology stack guidance and architecture design +- Transitioning from idea to implementation +- Building POC/MVP and need technical foundation + +**Skip this command when:** +- Project already has established documentation +- Working on existing codebase +- Only need single user story (use `/lazy plan` directly) +- Quick prototype without formal planning + +--- + +## Usage Examples + +```bash +# From project description +/lazy init-project "Build a real-time task management platform with AI prioritization" + +# From enhanced prompt file (recommended) +/lazy init-project --file enhanced_prompt.md + +# Minimal mode (skip architecture, faster) +/lazy init-project "E-commerce marketplace" --minimal + +# Skip architecture generation +/lazy init-project "API service" --no-arch + +# Disable auto-sync tracking +/lazy init-project "Chat app" --no-sync +``` + +--- + +## Requirements + +### Prerequisites +- Working directory is project root +- Git repository initialized (recommended) +- PROJECT-OVERVIEW.md should not already exist (will be overwritten) + +### Input Requirements +- **Project description** (required): Either inline text or file path via `--file` +- **Sufficient detail**: Mention key features, tech preferences, scale expectations +- **Clear goals**: What problem does this solve? + +### Optional Flags +- `--file FILE`: Read project description from file (STT enhanced prompt recommended) +- `--minimal`: Generate only PROJECT-OVERVIEW.md and SPECIFICATIONS.md (skip tech stack and architecture) +- `--no-arch`: Generate overview, specs, and tech stack but skip architecture diagrams +- `--no-sync`: Skip creating `.meta/last-sync.json` tracking file + +--- + +## Execution + +### Step 1: Parse Arguments and Load Project Description + +**Parse flags:** +```python +args = parse_arguments("$ARGUMENTS") + +# Extract flags +file_path = args.get("--file") +minimal_mode = "--minimal" in args +skip_arch = "--no-arch" in args +disable_sync = "--no-sync" in args + +# Get project description +if file_path: + # Read from file + project_description = read_file(file_path) + if not project_description: + return error(f"File not found or empty: {file_path}") +else: + # Use inline description + project_description = remove_flags(args) + if not project_description.strip(): + return error("No project description provided. Use inline text or --file FILE") +``` + +**Validation:** +- Project description must be non-empty +- If `--file` used, file must exist and be readable +- Minimum 50 characters for meaningful planning (warn if less) + +--- + +### Step 2: Create Project Management Directory + +**Setup directory structure:** +```bash +# Create base directory +mkdir -p ./project-management/.meta + +# Check if PROJECT-OVERVIEW.md exists +if [ -f "./project-management/PROJECT-OVERVIEW.md" ]; then + echo "Warning: PROJECT-OVERVIEW.md already exists and will be overwritten" +fi +``` + +**Output location**: Always `./project-management/` relative to current working directory. + +--- + +### Step 3: Invoke Project Planner Skill + +**Generate overview and specifications:** + +```python +# Invoke project-planner skill +result = Skill( + command="project-planner", + context={ + "description": project_description, + "output_dir": "./project-management/" + } +) + +# Skill generates: +# - PROJECT-OVERVIEW.md (vision, goals, features, constraints) +# - SPECIFICATIONS.md (requirements, API contracts, data models) + +# Verify both files were created +assert exists("./project-management/PROJECT-OVERVIEW.md"), "PROJECT-OVERVIEW.md not created" +assert exists("./project-management/SPECIFICATIONS.md"), "SPECIFICATIONS.md not created" +``` + +**What project-planner does:** +1. Extracts project context (name, features, goals, constraints) +2. Generates PROJECT-OVERVIEW.md with vision and high-level features +3. Generates SPECIFICATIONS.md with detailed technical requirements +4. Validates completeness of both documents + +**Expected output:** +- `PROJECT-OVERVIEW.md`: 2-3KB, executive summary format +- `SPECIFICATIONS.md`: 8-15KB, comprehensive technical details + +--- + +### Step 4: Invoke Tech Stack Architect Skill (unless --minimal or --no-arch) + +**Generate technology stack selection:** + +```python +# Skip if minimal mode or no-arch flag +if not minimal_mode: + # Read PROJECT-OVERVIEW.md for context + overview_content = read_file("./project-management/PROJECT-OVERVIEW.md") + + # Invoke tech-stack-architect skill + result = Skill( + command="tech-stack-architect", + context={ + "project_overview": overview_content, + "specifications": read_file("./project-management/SPECIFICATIONS.md"), + "output_dir": "./project-management/", + "skip_architecture": skip_arch # Only generate TECH-STACK.md if true + } + ) + + # Skill generates: + # - TECH-STACK.md (frontend, backend, database, DevOps choices with rationale) + # - ARCHITECTURE.md (system design with mermaid diagrams) [unless skip_arch] + + # Verify tech stack file created + assert exists("./project-management/TECH-STACK.md"), "TECH-STACK.md not created" + + if not skip_arch: + assert exists("./project-management/ARCHITECTURE.md"), "ARCHITECTURE.md not created" +``` + +**What tech-stack-architect does:** +1. Reads PROJECT-OVERVIEW.md for requirements and constraints +2. Analyzes technology needs across 4 categories: Frontend, Backend, Database, DevOps +3. Generates TECH-STACK.md with choices, rationale, alternatives, trade-offs +4. Designs system architecture with component diagrams +5. Generates ARCHITECTURE.md with mermaid diagrams for structure, data flow, deployment + +**Expected output:** +- `TECH-STACK.md`: 5-8KB, table-based technology selections +- `ARCHITECTURE.md`: 10-15KB, system design with 3-5 mermaid diagrams + +--- + +### Step 5: Create Tracking Metadata (unless --no-sync) + +**Generate sync tracking file:** + +```python +if not disable_sync: + metadata = { + "initialized_at": datetime.now().isoformat(), + "documents": { + "PROJECT-OVERVIEW.md": { + "created": datetime.now().isoformat(), + "size_bytes": file_size("./project-management/PROJECT-OVERVIEW.md"), + "checksum": sha256("./project-management/PROJECT-OVERVIEW.md") + }, + "SPECIFICATIONS.md": { + "created": datetime.now().isoformat(), + "size_bytes": file_size("./project-management/SPECIFICATIONS.md"), + "checksum": sha256("./project-management/SPECIFICATIONS.md") + }, + "TECH-STACK.md": { + "created": datetime.now().isoformat(), + "size_bytes": file_size("./project-management/TECH-STACK.md"), + "checksum": sha256("./project-management/TECH-STACK.md") + } if not minimal_mode else None, + "ARCHITECTURE.md": { + "created": datetime.now().isoformat(), + "size_bytes": file_size("./project-management/ARCHITECTURE.md"), + "checksum": sha256("./project-management/ARCHITECTURE.md") + } if not minimal_mode and not skip_arch else None + }, + "flags": { + "minimal": minimal_mode, + "skip_architecture": skip_arch + } + } + + # Write metadata + write_json("./project-management/.meta/last-sync.json", metadata) +``` + +**Purpose of tracking:** +- Detect manual changes to generated files +- Support future re-sync or update operations +- Track generation history + +--- + +### Step 6: Git Add (if in repository) + +**Stage generated files:** + +```bash +# Check if in git repo +if git rev-parse --git-dir > /dev/null 2>&1; then + # Add all generated files + git add ./project-management/PROJECT-OVERVIEW.md + git add ./project-management/SPECIFICATIONS.md + + if [ "$minimal_mode" = false ]; then + git add ./project-management/TECH-STACK.md + [ "$skip_arch" = false ] && git add ./project-management/ARCHITECTURE.md + fi + + [ "$disable_sync" = false ] && git add ./project-management/.meta/last-sync.json + + echo "✓ Files staged for commit (git add)" + echo "Note: Review files before committing" +else + echo "Not a git repository - skipping git add" +fi +``` + +**Important**: Files are staged but NOT committed. User should review before committing. + +--- + +### Step 7: Output Summary + +**Display comprehensive summary:** + +```markdown +## Project Initialization Complete + +**Project Name**: {extracted from PROJECT-OVERVIEW.md} + +**Documents Generated**: + +1. ✅ **PROJECT-OVERVIEW.md** ({size}KB) + - Vision and goals defined + - {N} key features identified + - {N} success criteria established + - Constraints and scope documented + +2. ✅ **SPECIFICATIONS.md** ({size}KB) + - {N} functional requirements detailed + - Non-functional requirements defined + - {N} API endpoints documented (if applicable) + - {N} data models specified + - Development phases outlined + +{if not minimal_mode:} +3. ✅ **TECH-STACK.md** ({size}KB) + - Frontend stack selected: {tech} + - Backend stack selected: {tech} + - Database choices: {tech} + - DevOps infrastructure: {tech} + - Trade-offs and migration path documented + +{if not skip_arch:} +4. ✅ **ARCHITECTURE.md** ({size}KB) + - System architecture designed + - {N} component diagrams included + - Data flow documented + - Security architecture defined + - Scalability strategy outlined + +{if not disable_sync:} +5. ✅ **Tracking metadata** (.meta/last-sync.json) + - Document checksums recorded + - Sync tracking enabled + +**Location**: `./project-management/` + +**Next Steps**: + +1. **Review Documentation** (~15-20 minutes) + - Read PROJECT-OVERVIEW.md for accuracy + - Verify SPECIFICATIONS.md completeness + - Check TECH-STACK.md technology choices + - Review ARCHITECTURE.md diagrams + +2. **Customize** (Optional) + - Refine goals and success criteria + - Add missing requirements + - Adjust technology choices if needed + - Enhance architecture diagrams + +3. **Commit Initial Docs** + ```bash + git commit -m "docs: initialize project documentation + + - Add PROJECT-OVERVIEW.md with vision and goals + - Add SPECIFICATIONS.md with technical requirements + - Add TECH-STACK.md with technology selections + - Add ARCHITECTURE.md with system design + + 🤖 Generated with [Claude Code](https://claude.com/claude-code) + + Co-Authored-By: Claude " + ``` + +4. **Start Planning User Stories** + ```bash + # Create first user story from specifications + /lazy plan "Implement user authentication system" + + # Or plan from specific requirement + /lazy plan --file ./project-management/SPECIFICATIONS.md --section "Authentication" + ``` + +5. **Begin Implementation** + ```bash + # After creating user story + /lazy code @US-1.1.md + ``` + +**Estimated Time to Review/Customize**: 15-30 minutes + +**Documentation Size**: {total}KB across {N} files + +--- + +## Tips for Success + +**Review Phase:** +- Don't skip the review - these docs guide all future development +- Check if technology choices match team skills +- Verify success criteria are measurable +- Ensure API contracts match business requirements + +**Customization:** +- Feel free to edit generated docs manually +- Add project-specific constraints or requirements +- Refine architecture based on team preferences +- Update specs as you learn more + +**Next Phase:** +- Use generated docs as input to `/lazy plan` +- Reference TECH-STACK.md during implementation +- Keep ARCHITECTURE.md updated as system evolves +- Revisit SUCCESS CRITERIA monthly +``` + +--- + +## Validation + +### Success Criteria + +**Documents Generated:** +- ✅ PROJECT-OVERVIEW.md exists and is non-empty (>1KB) +- ✅ SPECIFICATIONS.md exists and is comprehensive (>5KB) +- ✅ TECH-STACK.md exists (unless --minimal) and has 4 categories +- ✅ ARCHITECTURE.md exists (unless --minimal or --no-arch) and has mermaid diagrams +- ✅ .meta/last-sync.json exists (unless --no-sync) with checksums + +**Content Quality:** +- ✅ PROJECT-OVERVIEW.md has vision, goals, features, success criteria, constraints +- ✅ SPECIFICATIONS.md has functional requirements, API contracts, data models +- ✅ TECH-STACK.md has rationale and alternatives for each technology +- ✅ ARCHITECTURE.md has C4 diagram, component details, data flow diagrams + +**Git Integration:** +- ✅ Files staged for commit (if in git repo) +- ✅ No automatic commit created (user reviews first) + +### Error Conditions + +**Handle gracefully:** +- Empty or insufficient project description → Return error with guidance +- File not found (--file flag) → Clear error message with path +- PROJECT-OVERVIEW.md already exists → Warn but continue (overwrite) +- Skill execution failure → Display error and suggest manual creation +- Not in git repo → Skip git operations, warn user + +--- + +## Examples in Action + +### Example 1: Full Initialization (Recommended) + +```bash +$ /lazy init-project "Build a real-time task management platform with AI-powered prioritization, team collaboration, and GitHub integration. Target 1000 users, 99.9% uptime. Python backend, React frontend." + +Initializing project... + +Step 1/5: Generating project overview and specifications... +✓ PROJECT-OVERVIEW.md created (2.8KB) +✓ SPECIFICATIONS.md created (11.4KB) + +Step 2/5: Designing technology stack... +✓ TECH-STACK.md created (6.2KB) + - Frontend: React 18 + Zustand + Tailwind + - Backend: FastAPI + SQLAlchemy + - Database: PostgreSQL + Redis + - DevOps: AWS ECS + GitHub Actions + +Step 3/5: Architecting system design... +✓ ARCHITECTURE.md created (13.7KB) + - Component architecture with mermaid diagrams + - Authentication flow documented + - Scalability strategy defined + +Step 4/5: Creating tracking metadata... +✓ .meta/last-sync.json created + +Step 5/5: Staging files for git... +✓ 5 files staged (git add) + +## Project Initialization Complete + +Project: TaskFlow Pro - Modern task management with AI + +Documents Generated: +1. ✅ PROJECT-OVERVIEW.md (2.8KB) +2. ✅ SPECIFICATIONS.md (11.4KB) - 12 API endpoints, 6 data models +3. ✅ TECH-STACK.md (6.2KB) - Full stack defined +4. ✅ ARCHITECTURE.md (13.7KB) - 5 mermaid diagrams + +Next Steps: +1. Review docs (15-20 min) +2. Commit: git commit -m "docs: initialize project" +3. Create first story: /lazy plan "User authentication" + +Complete! Ready for user story planning. +``` + +### Example 2: Minimal Mode (Fast) + +```bash +$ /lazy init-project "E-commerce marketplace with product catalog and checkout" --minimal + +Initializing project (minimal mode)... + +Step 1/2: Generating project overview and specifications... +✓ PROJECT-OVERVIEW.md created (1.9KB) +✓ SPECIFICATIONS.md created (8.3KB) + +Step 2/2: Staging files... +✓ 2 files staged (git add) + +## Project Initialization Complete (Minimal) + +Project: E-Commerce Marketplace + +Documents Generated: +1. ✅ PROJECT-OVERVIEW.md (1.9KB) +2. ✅ SPECIFICATIONS.md (8.3KB) + +Skipped (minimal mode): +- TECH-STACK.md (technology selection) +- ARCHITECTURE.md (system design) + +Note: Use full mode if you need tech stack guidance and architecture diagrams. + +Next Steps: +1. Review specs +2. Manually define tech stack (or run: /lazy init-project --no-minimal) +3. Create stories: /lazy plan "Product catalog" +``` + +### Example 3: From Enhanced Prompt File + +```bash +$ /lazy init-project --file enhanced_prompt.md + +Reading project description from: enhanced_prompt.md + +Initializing project... + +Step 1/5: Generating project overview and specifications... +✓ PROJECT-OVERVIEW.md created (3.2KB) +✓ SPECIFICATIONS.md created (14.8KB) + - Extracted 15 functional requirements + - Defined 8 API contracts + - Specified 9 data models + +Step 2/5: Designing technology stack... +✓ TECH-STACK.md created (7.1KB) + +Step 3/5: Architecting system design... +✓ ARCHITECTURE.md created (16.4KB) + +... + +Complete! High-quality docs generated from enhanced prompt. +``` + +--- + +## Integration with Other Commands + +### With `/lazy plan` + +```bash +# Initialize project foundation +/lazy init-project "Project description" + +# Create first user story (references SPECIFICATIONS.md automatically) +/lazy plan "Implement authentication" +# → project-manager uses SPECIFICATIONS.md for context +# → Generates US-1.1.md aligned with project specs +``` + +### With `/lazy code` + +```bash +# During implementation +/lazy code @US-1.1.md +# → context-packer loads TECH-STACK.md and ARCHITECTURE.md +# → Implementation follows defined architecture patterns +# → Technology choices match TECH-STACK.md +``` + +### With `/lazy review` + +```bash +# During story review +/lazy review US-1.1 +# → reviewer-story agent checks alignment with SPECIFICATIONS.md +# → Validates implementation matches ARCHITECTURE.md +# → Ensures success criteria from PROJECT-OVERVIEW.md are met +``` + +--- + +## Environment Variables + +```bash +# Skip architecture generation by default +export LAZYDEV_INIT_SKIP_ARCH=1 + +# Minimal mode by default +export LAZYDEV_INIT_MINIMAL=1 + +# Disable sync tracking +export LAZYDEV_INIT_NO_SYNC=1 + +# Custom output directory +export LAZYDEV_PROJECT_DIR="./docs/project" +``` + +--- + +## Troubleshooting + +### Issue: "Insufficient project description" + +**Problem**: Description too vague or short. + +**Solution**: +```bash +# Provide more detail +/lazy init-project "Build task manager with: +- Real-time collaboration +- AI prioritization +- GitHub/Jira integration +- Target: 10k users, 99.9% uptime +- Stack preference: Python + React" + +# Or use enhanced prompt file +/lazy init-project --file enhanced_prompt.md +``` + +### Issue: "PROJECT-OVERVIEW.md already exists" + +**Problem**: Running init-project in directory that's already initialized. + +**Solution**: +```bash +# Review existing docs first +ls -la ./project-management/ + +# If you want to regenerate (will overwrite) +/lazy init-project "New description" + +# Or work with existing docs +/lazy plan "First feature" +``` + +### Issue: "Skill execution failed" + +**Problem**: project-planner or tech-stack-architect skill error. + +**Solution**: +```bash +# Check skill files exist +ls .claude/skills/project-planner/SKILL.md +ls .claude/skills/tech-stack-architect/SKILL.md + +# Try minimal mode (skips tech-stack-architect) +/lazy init-project "Description" --minimal + +# Manual fallback: create docs manually using templates +# See .claude/skills/project-planner/SKILL.md for templates +``` + +### Issue: "No technology preferences detected" + +**Problem**: TECH-STACK.md has generic choices that don't match needs. + +**Solution**: +```bash +# Be specific about tech preferences in description +/lazy init-project "API service using FastAPI, PostgreSQL, deployed on AWS ECS with GitHub Actions CI/CD" + +# Or edit TECH-STACK.md manually after generation +# File is meant to be customized +``` + +--- + +## Key Principles + +1. **Documentation-First**: Create foundation before writing code +2. **Smart Defaults**: Skills generate opinionated but reasonable choices +3. **Customizable**: All generated docs are meant to be refined +4. **Integration**: Docs feed into planning and implementation commands +5. **Version Control**: Track docs alongside code +6. **Living Documents**: Update as project evolves +7. **No Lock-In**: Skip sections with flags, edit freely + +--- + +## Related Commands + +- `/lazy plan` - Create user stories from initialized project +- `/lazy code` - Implement features following architecture +- `/lazy review` - Validate against project specifications +- `/lazy docs` - Generate additional documentation + +--- + +## Skills Used + +- `project-planner` - Generates PROJECT-OVERVIEW.md and SPECIFICATIONS.md +- `tech-stack-architect` - Generates TECH-STACK.md and ARCHITECTURE.md +- `output-style-selector` (automatic) - Formats output optimally + +--- + +**Version:** 1.0.0 +**Status:** Production-Ready +**Philosophy:** Document first, build second. Clear foundation, faster development. diff --git a/.claude/commands/question.md b/.claude/commands/question.md new file mode 100644 index 0000000..7e46f97 --- /dev/null +++ b/.claude/commands/question.md @@ -0,0 +1,563 @@ +--- +description: Answer questions about code or technical topics without creating artifacts +argument-hint: "" +allowed-tools: Read, Glob, Grep, Bash, Task +--- + +# Question Command: Intelligent Q&A System + +Answer questions about your codebase or general technical topics with zero artifacts. + +## Core Philosophy + +**Ask anything, get answers, create nothing.** + +This command is for Q&A ONLY - no file creation, no documentation generation, no code changes. + +## Usage Examples + +```bash +# Codebase questions +/lazy question "where is user authentication handled?" +/lazy question "how does the payment processor work?" +/lazy question "what files implement the REST API?" + +# General technical questions +/lazy question "what is the difference between REST and GraphQL?" +/lazy question "how to implement OAuth2 in Python?" +/lazy question "best practices for API versioning?" +``` + +## When to Use + +**Use this command when:** +- You need to understand how something works in the codebase +- You want to locate specific functionality +- You have general technical questions +- You need quick documentation lookups + +**Do NOT use for:** +- Creating documentation files +- Modifying code +- Generating new files +- Planning features (use `/lazy plan` instead) + +## Requirements + +**Input:** +- Single question string (clear and specific) +- Can be about codebase OR general knowledge + +**Critical:** +- **NO file creation** - answers only +- **NO .md files** - inline responses only +- **NO code generation** - explanation only +- **NO documentation updates** - read-only operation + +## Question Type Detection + +### Decision Logic + +```python +def should_use_codebase(question: str) -> bool: + """Decide if question is about codebase or general knowledge.""" + + codebase_indicators = [ + "in this", "this codebase", "this project", "this repo", + "where is", "how does", "why does", "what does", + "in our", "our codebase", "our project", + "file", "function", "class", "module", + "implemented", "defined", "located", + "show me", "find", "which file" + ] + + question_lower = question.lower() + + # If question mentions codebase-specific terms → use codebase + if any(ind in question_lower for ind in codebase_indicators): + return True + + # If question is general knowledge → use research agent + general_indicators = [ + "what is", "how to", "difference between", + "best practice", "tutorial", "documentation", + "learn", "explain", "guide", "introduction" + ] + + if any(ind in question_lower for ind in general_indicators): + return False + + # Default: assume codebase question + return True +``` + +### Examples by Type + +**Codebase Questions (searches project):** +- "where is user authentication handled?" +- "how does this project structure payments?" +- "what files implement the API endpoints?" +- "in our codebase, how is logging configured?" +- "show me where database migrations are defined" +- "which function handles token validation?" + +**General Questions (uses research agent):** +- "what is the difference between JWT and session tokens?" +- "how to implement OAuth2 in Python?" +- "best practices for API versioning?" +- "explain what GraphQL is" +- "tutorial on writing pytest fixtures" + +## Execution Workflow + +### Phase 1: Analyze Question + +```python +question = "$ARGUMENTS".strip() + +# Determine question type +is_codebase_question = should_use_codebase(question) + +if is_codebase_question: + approach = "codebase_search" + tools = ["Grep", "Glob", "Read"] +else: + approach = "research_agent" + tools = ["Task (research agent)"] +``` + +### Phase 2a: Codebase Question Path + +**If question is about the codebase:** + +```python +# 1. Extract search terms from question +search_terms = extract_keywords(question) +# Example: "where is authentication handled?" → ["authentication", "auth", "login"] + +# 2. Search codebase with Grep +for term in search_terms: + # Search for term in code + matches = grep(pattern=term, output_mode="files_with_matches") + + # Search for term in comments/docstrings + doc_matches = grep(pattern=f"(#|//|\"\"\"|\"\"\").*{term}", output_mode="content", -n=True) + +# 3. Prioritize results +relevant_files = prioritize_by_relevance(matches, question) +# Priority: src/ > tests/ > docs/ + +# 4. Read top relevant files +for file in relevant_files[:5]: # Top 5 most relevant + content = Read(file_path=file) + # Extract relevant sections based on search terms + +# 5. Analyze and answer +answer = """ +Based on codebase analysis: + +{synthesized answer from code} + +**References:** +- {file1}:{line1} +- {file2}:{line2} +""" +``` + +**Search Strategy:** + +```python +# Identify search terms based on question type +if "where" in question or "which file" in question: + # Location question - find files + search_mode = "files_with_matches" + search_scope = "filenames and content" + +elif "how does" in question or "how is" in question: + # Implementation question - show code + search_mode = "content" + search_scope = "function definitions and logic" + context_lines = 10 # Use -C flag + +elif "what is" in question and is_codebase_question: + # Definition question - find docstrings/comments + search_mode = "content" + search_scope = "docstrings, comments, README" +``` + +### Phase 2b: General Question Path + +**If question is general knowledge:** + +```python +Task( + prompt=f""" +You are the Research Agent for LAZY-DEV-FRAMEWORK. + +## Question to Answer + +{question} + +## Instructions + +1. This is a GENERAL technical question (not codebase-specific) +2. Answer based on: + - Your training knowledge + - Industry best practices + - Official documentation (if available) + - Common patterns and conventions + +3. Provide a clear, concise answer with: + - Direct answer to the question + - Key concepts explained + - Code examples if relevant (generic, not project-specific) + - Links to official docs/resources + +4. Structure answer for readability: + - Use bullet points for lists + - Use code blocks for examples + - Use clear section headers + +## Output Format + +**Answer:** +{direct answer} + +**Key Concepts:** +- {concept 1} +- {concept 2} + +**Example (if applicable):** +```language +{generic code example} +``` + +**Further Reading:** +- {resource 1} +- {resource 2} + +**Critical Reminder:** +- Do NOT create any files +- Do NOT search the codebase +- Do NOT reference project-specific code +- This is general knowledge only +""" +) +``` + +## Output Format + +### For Codebase Questions + +```markdown +## Answer + +Based on codebase analysis: + +{synthesized answer from code analysis} + +**Key Files:** +- `{file1}` - {purpose} +- `{file2}` - {purpose} + +**Code References:** +- {file1}:{line1} - {what happens here} +- {file2}:{line2} - {what happens here} + +**Flow:** +{if applicable, explain the flow through multiple files} +``` + +### For General Questions + +```markdown +## Answer + +{direct answer to question} + +**Key Concepts:** +- {concept 1 with explanation} +- {concept 2 with explanation} + +**Example:** +```language +{generic code example if relevant} +``` + +**Further Reading:** +- {official documentation link} +- {tutorial or guide link} +``` + +## Validation + +**Success criteria:** +- Question answered clearly and accurately +- References provided (for codebase questions) +- No files created or modified +- No commits made +- Response is conversational, not a file + +**Anti-patterns (MUST AVOID):** +- Creating documentation files +- Generating .md files +- Writing code to files +- Running git commands +- Creating pull requests + +## Examples in Action + +### Example 1: Codebase Location Question + +```bash +$ /lazy question "where is user authentication handled?" + +## Answer + +Based on codebase analysis: + +User authentication is handled primarily in the `src/auth/` module: + +**Key Files:** +- `src/auth/login.py` - Main login endpoint and session management +- `src/auth/middleware.py` - Authentication middleware for protected routes +- `src/auth/jwt_handler.py` - JWT token generation and validation + +**Code References:** +- src/auth/login.py:45 - `authenticate_user()` validates credentials +- src/auth/middleware.py:23 - `require_auth()` decorator protects endpoints +- src/auth/jwt_handler.py:67 - `generate_token()` creates JWT tokens + +**Authentication Flow:** +1. User submits credentials → `login.py:authenticate_user()` +2. Credentials validated against database +3. JWT token generated → `jwt_handler.py:generate_token()` +4. Token returned to client +5. Protected routes checked by → `middleware.py:require_auth()` +``` + +### Example 2: Codebase Implementation Question + +```bash +$ /lazy question "how does the payment processor work?" + +## Answer + +Based on codebase analysis: + +The payment processor uses a strategy pattern with multiple payment providers: + +**Key Files:** +- `src/payment/processor.py` - Main processor coordinator +- `src/payment/providers/stripe.py` - Stripe integration +- `src/payment/providers/paypal.py` - PayPal integration + +**Processing Flow:** +1. Client initiates payment → `processor.py:process_payment()` +2. Processor selects provider based on payment method +3. Provider-specific handler called (Stripe/PayPal) +4. Payment validated and processed +5. Result logged and webhook sent + +**Code References:** +- src/payment/processor.py:89 - Main `process_payment()` entry point +- src/payment/processor.py:145 - Provider selection logic +- src/payment/providers/stripe.py:34 - Stripe charge creation +- src/payment/webhook.py:23 - Payment confirmation webhook + +**Error Handling:** +- Retries configured in processor.py:67 (3 attempts, exponential backoff) +- Failed payments logged to `payment_failures` table +- Customer notified via email on failure +``` + +### Example 3: General Technical Question + +```bash +$ /lazy question "what is the difference between REST and GraphQL?" + +## Answer + +REST and GraphQL are both API architectural patterns, but they differ in how clients request and receive data: + +**Key Differences:** + +1. **Data Fetching:** + - REST: Multiple endpoints, each returns fixed data structure + - GraphQL: Single endpoint, client specifies exact data needed + +2. **Over/Under-fetching:** + - REST: Often returns too much or too little data + - GraphQL: Client gets exactly what they request + +3. **Versioning:** + - REST: Typically requires version in URL (v1, v2) + - GraphQL: Schema evolution without versions + +4. **Request Structure:** + - REST: HTTP methods (GET, POST, PUT, DELETE) + - GraphQL: All requests are POST with query body + +**Example:** + +REST - Multiple requests needed: +```bash +GET /api/users/123 # Get user +GET /api/users/123/posts # Get user's posts +GET /api/posts/456/comments # Get post comments +``` + +GraphQL - Single request: +```graphql +query { + user(id: 123) { + name + email + posts { + title + comments { + text + author + } + } + } +} +``` + +**Further Reading:** +- [GraphQL Official Docs](https://graphql.org/learn/) +- [REST API Best Practices](https://restfulapi.net/) +``` + +### Example 4: Edge Case - Ambiguous Question + +```bash +$ /lazy question "how does authentication work?" + +# Decision: Too ambiguous - clarify first + +## Clarification Needed + +Your question "how does authentication work?" could mean: + +1. **Codebase-specific:** "How does authentication work in THIS project?" + - Run: `/lazy question "how does authentication work in this codebase?"` + +2. **General concept:** "How does authentication work as a concept?" + - Run: `/lazy question "explain authentication concepts and best practices"` + +Please rephrase your question to be more specific. +``` + +## Decision Tree + +``` +Parse question + ↓ +Contains codebase indicators? ──Yes──→ Codebase Search Path + │ ↓ + No Extract keywords + ↓ ↓ +Contains general indicators? ──Yes──→ Grep/Glob codebase + │ ↓ + No Read relevant files + ↓ ↓ + Ambiguous Synthesize answer + ↓ ↓ +Ask for clarification Format with references + ↓ + Return answer + (NO FILES CREATED) + +Research Agent Path: + ↓ +Delegate to research agent + ↓ +Agent uses training knowledge + ↓ +Format answer with examples + ↓ +Return answer +(NO FILES CREATED) +``` + +## Key Principles + +1. **Read-Only Operation**: Never create, modify, or delete files +2. **Zero Artifacts**: No .md files, no commits, no PRs +3. **Smart Detection**: Auto-determine codebase vs general question +4. **Cite Sources**: Always reference file:line for codebase answers +5. **Conversational**: Return inline answers, not documentation +6. **Focused Search**: Top 5 most relevant files only +7. **Context-Aware**: Use -C flag for code context when needed + +## Integration Points + +**With other commands:** +```bash +# Learn about codebase before implementing +/lazy question "where is user validation implemented?" +/lazy code "add email validation to user signup" + +# Understand before documenting +/lazy question "how does the API rate limiting work?" +/lazy docs src/api/rate_limiter.py + +# Research before planning +/lazy question "best practices for OAuth2 implementation" +/lazy plan "add OAuth2 authentication" +``` + +## Environment Variables + +None required - this is a pure Q&A command. + +## Troubleshooting + +**Issue: "No results found"** +``` +Try rephrasing your question: +- Use different keywords +- Be more specific about file types or modules +- Check if functionality exists in project +``` + +**Issue: "Too many results"** +``` +Narrow your question: +- Specify module or component +- Add context about feature area +- Ask about specific file/function +``` + +**Issue: "Wrong type detected"** +``` +Force codebase search: +- Add "in this codebase" to question + +Force general search: +- Add "explain" or "what is" to question +``` + +## Anti-Patterns to Avoid + +**DO NOT:** +- Create documentation files from answers +- Generate code files based on research +- Write .md files with Q&A content +- Make commits or PRs +- Modify existing files +- Create new directories + +**DO:** +- Answer questions inline +- Provide file references +- Show code snippets in response +- Explain concepts clearly +- Link to external resources + +--- + +**Version:** 2.2.0 +**Status:** Production-Ready +**Philosophy:** Ask anything, get answers, create nothing. diff --git a/.claude/hooks/log_events.py b/.claude/hooks/log_events.py index 97b8649..c4c89dc 100644 --- a/.claude/hooks/log_events.py +++ b/.claude/hooks/log_events.py @@ -21,7 +21,7 @@ import sys from pathlib import Path from datetime import datetime -from hook_utils import sanitize_for_logging, sanitize_dict_for_logging +from hook_utils import sanitize_for_logging # Configure logging to stderr with timestamp logging.basicConfig( diff --git a/.claude/hooks/post_tool_use_format.py b/.claude/hooks/post_tool_use_format.py index 94dd806..2f66e1d 100644 --- a/.claude/hooks/post_tool_use_format.py +++ b/.claude/hooks/post_tool_use_format.py @@ -55,9 +55,9 @@ def format_python(file_path: Path) -> tuple[bool, str]: Tuple of (success, message) """ try: - # Try Black first + # Try Black first (using python -m for better reliability) result = subprocess.run( - ["black", "--quiet", str(file_path)], + [sys.executable, "-m", "black", "--quiet", str(file_path)], capture_output=True, text=True, timeout=3, @@ -65,9 +65,9 @@ def format_python(file_path: Path) -> tuple[bool, str]: black_success = result.returncode == 0 - # Try Ruff format + # Try Ruff format (using python -m for better reliability) result = subprocess.run( - ["ruff", "format", str(file_path)], + [sys.executable, "-m", "ruff", "format", str(file_path)], capture_output=True, text=True, timeout=3, @@ -88,8 +88,8 @@ def format_python(file_path: Path) -> tuple[bool, str]: logger.warning("Python formatter timed out (3s)") except subprocess.SubprocessError as e: logger.warning(f"Formatter subprocess error: {type(e).__name__}") - except FileNotFoundError: - logger.debug("Black/Ruff not found in PATH") + except (FileNotFoundError, ModuleNotFoundError): + logger.debug("Black/Ruff not found as Python modules") return False, "Formatters not available (Black/Ruff)" diff --git a/.claude/hooks/post_tool_use_quality.py b/.claude/hooks/post_tool_use_quality.py index 703f32a..04a7a13 100644 --- a/.claude/hooks/post_tool_use_quality.py +++ b/.claude/hooks/post_tool_use_quality.py @@ -20,7 +20,6 @@ import os import sys import subprocess -from pathlib import Path # Configure logging to stderr with timestamp logging.basicConfig( diff --git a/.claude/hooks/pre_tool_use.py b/.claude/hooks/pre_tool_use.py index a74bc90..f662c3e 100644 --- a/.claude/hooks/pre_tool_use.py +++ b/.claude/hooks/pre_tool_use.py @@ -25,7 +25,7 @@ import sys from pathlib import Path from datetime import datetime -from hook_utils import sanitize_for_logging, sanitize_dict_for_logging +from hook_utils import sanitize_dict_for_logging # Configure logging to stderr with timestamp logging.basicConfig( @@ -445,7 +445,7 @@ def main(): file=sys.stderr, ) print( - f"Tip: Use Glob or Grep tools instead of bash find/grep for code search.", + "Tip: Use Glob or Grep tools instead of bash find/grep for code search.", file=sys.stderr, ) sys.exit(2) diff --git a/.claude/hooks/session_start.py b/.claude/hooks/session_start.py index 80471f7..78044d7 100644 --- a/.claude/hooks/session_start.py +++ b/.claude/hooks/session_start.py @@ -193,6 +193,48 @@ def initialize_session_state(session_id: str, context: dict) -> None: logger.warning(f"Unexpected error writing session file: {type(e).__name__}") +def ensure_memory_directory() -> None: + """ + Auto-create .claude/memory/ directory if it doesn't exist. + This ensures MCP Memory server has a place to store data. + """ + memory_dir = Path(".claude/memory") + if not memory_dir.exists(): + try: + memory_dir.mkdir(parents=True, exist_ok=True) + logger.info("Created .claude/memory/ directory for MCP Memory") + except OSError as e: + logger.warning(f"Failed to create memory directory: {type(e).__name__}") + + +def check_environment_variables() -> dict: + """ + Check required environment variables and warn if missing. + + Returns: + Dictionary with environment variable status + """ + status = {} + + # Check ENRICHMENT_MODEL (required for pre-prompt enrichment) + enrichment_model = os.getenv("ENRICHMENT_MODEL") + if not enrichment_model: + status["ENRICHMENT_MODEL"] = "missing" + logger.warning( + "⚠️ ENRICHMENT_MODEL not set. Pre-prompt enrichment will not work." + ) + logger.warning(" Set it with: export ENRICHMENT_MODEL=claude-3-5-haiku") + else: + status["ENRICHMENT_MODEL"] = enrichment_model + logger.info(f"✓ ENRICHMENT_MODEL: {enrichment_model}") + + # Check MEMORY_FILE_PATH (optional, has default) + memory_path = os.getenv("MEMORY_FILE_PATH", ".claude/memory/memory.jsonl") + status["MEMORY_FILE_PATH"] = memory_path + + return status + + def log_session_start(session_id: str, context: dict) -> None: """ Log session start event. @@ -238,6 +280,12 @@ def log_session_start(session_id: str, context: dict) -> None: def main(): """Hook entry point.""" try: + # Auto-create memory directory (new feature) + ensure_memory_directory() + + # Check environment variables (new feature) + check_environment_variables() + # Read JSON input from stdin input_data = json.load(sys.stdin) @@ -279,10 +327,10 @@ def main(): # Also print user-friendly message to stderr (visible in console) print("\n=== LAZY-DEV-FRAMEWORK Session Started ===", file=sys.stderr) print(f"Session ID: {session_id}", file=sys.stderr) - print(f'PRD loaded: {"✓" if context["prd"] else "✗"}', file=sys.stderr) - print(f'TASKS loaded: {"✓" if context["tasks"] else "✗"}', file=sys.stderr) - print(f'Git branch: {context["branch"] or "N/A"}', file=sys.stderr) - print(f'Git history: {len(context["git_history"])} commits', file=sys.stderr) + print(f"PRD loaded: {'✓' if context['prd'] else '✗'}", file=sys.stderr) + print(f"TASKS loaded: {'✓' if context['tasks'] else '✗'}", file=sys.stderr) + print(f"Git branch: {context['branch'] or 'N/A'}", file=sys.stderr) + print(f"Git history: {len(context['git_history'])} commits", file=sys.stderr) print("==========================================\n", file=sys.stderr) sys.exit(0) diff --git a/.claude/hooks/stop.py b/.claude/hooks/stop.py index ab1c142..64b8434 100644 --- a/.claude/hooks/stop.py +++ b/.claude/hooks/stop.py @@ -191,10 +191,10 @@ def should_block_completion( # Enforce minimum tests if configured if min_tests is not None and test_info["test_count"] < min_tests: - return True, f'Minimum tests not met: {test_info["test_count"]}/{min_tests}' + return True, f"Minimum tests not met: {test_info['test_count']}/{min_tests}" # Allow if tests passed - return False, f'All tests passing ({test_info["test_count"]} tests)' + return False, f"All tests passing ({test_info['test_count']} tests)" def log_stop_event( @@ -315,7 +315,7 @@ def main(): print(json.dumps(output)) # Print success message - print(f"\n✅ All tests passing! Ready for review.\n", file=sys.stderr) + print("\n✅ All tests passing! Ready for review.\n", file=sys.stderr) sys.exit(0) diff --git a/.claude/hooks/user_prompt_submit.py b/.claude/hooks/user_prompt_submit.py index ab23533..6223134 100644 --- a/.claude/hooks/user_prompt_submit.py +++ b/.claude/hooks/user_prompt_submit.py @@ -479,13 +479,13 @@ def format_context_injection(git_context: dict, current_task: str | None) -> str def log_prompt(session_id: str, input_data: dict) -> None: """ - Log user prompt to logs/user_prompt_submit.json. + Log user prompt to .claude/data/logs/user_prompt_submit.json. Args: session_id: Session identifier input_data: Full hook input data """ - log_dir = Path("logs") + log_dir = Path(".claude/data/logs") log_dir.mkdir(parents=True, exist_ok=True) log_file = log_dir / "user_prompt_submit.json" @@ -574,15 +574,13 @@ def main(): context_injection = format_context_injection(git_context, current_task) # Lightweight output-style selection - style_name = None - style_conf = 0.0 - style_reason = "" if os.getenv("LAZYDEV_DISABLE_STYLE") not in {"1", "true", "TRUE"}: logger.debug("Selecting output style") sel, conf, reason = choose_output_style(original_prompt) if sel != "off": - style_name, style_conf, style_reason = sel, conf, reason - logger.info(f"Output style selected: {sel} (confidence: {conf:.2f}, reason: {reason})") + logger.info( + f"Output style selected: {sel} (confidence: {conf:.2f}, reason: {reason})" + ) style_block = ( f"\n\n## Output Style (Auto)\n\n{build_style_block(sel)}\n" ) @@ -620,7 +618,9 @@ def main(): logger.debug("Detecting memory intent") mi = detect_memory_intent(original_prompt) if mi.get("enabled"): - logger.info(f"Memory graph skill activated: intents={mi['intents']}, mentions={len(mi['mentions'])}") + logger.info( + f"Memory graph skill activated: intents={mi['intents']}, mentions={len(mi['mentions'])}" + ) additional_parts.append( build_memory_skill_block(mi["intents"], mi["mentions"]) ) @@ -630,7 +630,9 @@ def main(): logger.info("Memory skill disabled via LAZYDEV_DISABLE_MEMORY_SKILL") additional_context = "".join(additional_parts) - logger.info(f"Total additional context length: {len(additional_context)} characters") + logger.info( + f"Total additional context length: {len(additional_context)} characters" + ) # Log the prompt logger.debug("Logging prompt to file") @@ -659,7 +661,9 @@ def main(): except json.JSONDecodeError as e: # Handle JSON decode errors gracefully - logger.error(f"JSON decode error in user_prompt_submit: {type(e).__name__} - {e}") + logger.error( + f"JSON decode error in user_prompt_submit: {type(e).__name__} - {e}" + ) sys.exit(0) except IOError as e: # Handle file I/O errors gracefully @@ -667,7 +671,10 @@ def main(): sys.exit(0) except Exception as e: # Handle any other errors gracefully - logger.error(f"Unexpected error in user_prompt_submit: {type(e).__name__} - {e}", exc_info=True) + logger.error( + f"Unexpected error in user_prompt_submit: {type(e).__name__} - {e}", + exc_info=True, + ) sys.exit(0) diff --git a/.claude/scripts/format.py b/.claude/scripts/format.py index 93c07f3..66e760d 100644 --- a/.claude/scripts/format.py +++ b/.claude/scripts/format.py @@ -43,25 +43,25 @@ def format_code(path: str, session_id: Optional[str] = None) -> int: "script": "format.py", "path": str(path_obj), "session_id": session_id, - "steps": [] + "steps": [], } print(f"[FORMAT] Running Black on {path}...") start_time = datetime.utcnow() black_result = subprocess.run( - ["black", str(path_obj)], - capture_output=True, - text=True + ["black", str(path_obj)], capture_output=True, text=True ) black_duration = (datetime.utcnow() - start_time).total_seconds() - log_entry["steps"].append({ - "tool": "black", - "duration_seconds": black_duration, - "exit_code": black_result.returncode, - "stdout": black_result.stdout, - "stderr": black_result.stderr - }) + log_entry["steps"].append( + { + "tool": "black", + "duration_seconds": black_duration, + "exit_code": black_result.returncode, + "stdout": black_result.stdout, + "stderr": black_result.stderr, + } + ) if black_result.returncode != 0: print(f"[ERROR] Black failed:\n{black_result.stderr}") @@ -71,19 +71,19 @@ def format_code(path: str, session_id: Optional[str] = None) -> int: print(f"[FORMAT] Running Ruff format on {path}...") start_time = datetime.utcnow() ruff_result = subprocess.run( - ["ruff", "format", str(path_obj)], - capture_output=True, - text=True + ["ruff", "format", str(path_obj)], capture_output=True, text=True ) ruff_duration = (datetime.utcnow() - start_time).total_seconds() - log_entry["steps"].append({ - "tool": "ruff", - "duration_seconds": ruff_duration, - "exit_code": ruff_result.returncode, - "stdout": ruff_result.stdout, - "stderr": ruff_result.stderr - }) + log_entry["steps"].append( + { + "tool": "ruff", + "duration_seconds": ruff_duration, + "exit_code": ruff_result.returncode, + "stdout": ruff_result.stdout, + "stderr": ruff_result.stderr, + } + ) if ruff_result.returncode != 0: print(f"[ERROR] Ruff format failed:\n{ruff_result.stderr}") @@ -117,12 +117,12 @@ def _write_log(log_entry: dict, session_id: Optional[str]) -> None: # Append to existing log logs = [] if log_file.exists(): - with open(log_file, 'r') as f: + with open(log_file, "r") as f: logs = json.load(f) logs.append(log_entry) - with open(log_file, 'w') as f: + with open(log_file, "w") as f: json.dump(logs, f, indent=2) diff --git a/.claude/scripts/gh_wrapper.py b/.claude/scripts/gh_wrapper.py index 223ec7a..8fab708 100644 --- a/.claude/scripts/gh_wrapper.py +++ b/.claude/scripts/gh_wrapper.py @@ -16,7 +16,7 @@ def create_issue( labels: Optional[List[str]] = None, milestone: Optional[str] = None, assignee: Optional[str] = None, - repo: Optional[str] = None + repo: Optional[str] = None, ) -> str: """ Create GitHub issue. @@ -67,7 +67,7 @@ def create_pr( base: str = "main", head: Optional[str] = None, labels: Optional[List[str]] = None, - repo: Optional[str] = None + repo: Optional[str] = None, ) -> str: """ Create GitHub pull request. @@ -93,12 +93,7 @@ def create_pr( >>> GitHubWrapper.create_pr("Feature", "Description", base="main") 'https://github.com/owner/repo/pull/456' """ - cmd = [ - "gh", "pr", "create", - "--title", title, - "--body", body, - "--base", base - ] + cmd = ["gh", "pr", "create", "--title", title, "--body", body, "--base", base] if head: cmd.extend(["--head", head]) @@ -130,9 +125,7 @@ def get_current_branch() -> str: 'feature/new-feature' """ result = subprocess.run( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], - capture_output=True, - text=True + ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True ) if result.returncode != 0: @@ -162,9 +155,19 @@ def list_issues(repo: str, state: str = "open") -> List[Dict]: import json result = subprocess.run( - ["gh", "issue", "list", "--repo", repo, "--state", state, "--json", "number,title,body"], + [ + "gh", + "issue", + "list", + "--repo", + repo, + "--state", + state, + "--json", + "number,title,body", + ], capture_output=True, - text=True + text=True, ) if result.returncode != 0: @@ -177,7 +180,9 @@ def main() -> None: """CLI interface for gh_wrapper.py.""" if len(sys.argv) < 2: print("Usage:") - print(" python gh_wrapper.py create-issue --title '...' --body '...' [--labels '...']") + print( + " python gh_wrapper.py create-issue --title '...' --body '...' [--labels '...']" + ) print(" python gh_wrapper.py create-pr --title '...' --body '...' --base main") sys.exit(1) @@ -185,9 +190,22 @@ def main() -> None: try: if command == "create-issue": - title = next((sys.argv[i+1] for i, arg in enumerate(sys.argv) if arg == "--title"), None) - body = next((sys.argv[i+1] for i, arg in enumerate(sys.argv) if arg == "--body"), None) - labels_str = next((sys.argv[i+1] for i, arg in enumerate(sys.argv) if arg == "--labels"), None) + title = next( + (sys.argv[i + 1] for i, arg in enumerate(sys.argv) if arg == "--title"), + None, + ) + body = next( + (sys.argv[i + 1] for i, arg in enumerate(sys.argv) if arg == "--body"), + None, + ) + labels_str = next( + ( + sys.argv[i + 1] + for i, arg in enumerate(sys.argv) + if arg == "--labels" + ), + None, + ) if not title or not body: print("[ERROR] Missing required: --title and --body") @@ -196,17 +214,31 @@ def main() -> None: labels = labels_str.split(",") if labels_str else None issue_url = GitHubWrapper.create_issue( - title=title, - body=body, - labels=labels + title=title, body=body, labels=labels ) print(f"[SUCCESS] Issue created: {issue_url}") elif command == "create-pr": - title = next((sys.argv[i+1] for i, arg in enumerate(sys.argv) if arg == "--title"), None) - body = next((sys.argv[i+1] for i, arg in enumerate(sys.argv) if arg == "--body"), None) - base = next((sys.argv[i+1] for i, arg in enumerate(sys.argv) if arg == "--base"), "main") - labels_str = next((sys.argv[i+1] for i, arg in enumerate(sys.argv) if arg == "--labels"), None) + title = next( + (sys.argv[i + 1] for i, arg in enumerate(sys.argv) if arg == "--title"), + None, + ) + body = next( + (sys.argv[i + 1] for i, arg in enumerate(sys.argv) if arg == "--body"), + None, + ) + base = next( + (sys.argv[i + 1] for i, arg in enumerate(sys.argv) if arg == "--base"), + "main", + ) + labels_str = next( + ( + sys.argv[i + 1] + for i, arg in enumerate(sys.argv) + if arg == "--labels" + ), + None, + ) if not title or not body: print("[ERROR] Missing required: --title and --body") @@ -215,10 +247,7 @@ def main() -> None: labels = labels_str.split(",") if labels_str else None pr_url = GitHubWrapper.create_pr( - title=title, - body=body, - base=base, - labels=labels + title=title, body=body, base=base, labels=labels ) print(f"[SUCCESS] PR created: {pr_url}") diff --git a/.claude/scripts/lint.py b/.claude/scripts/lint.py index ac04414..5e24db2 100644 --- a/.claude/scripts/lint.py +++ b/.claude/scripts/lint.py @@ -43,7 +43,7 @@ def lint_code(path: str, session_id: Optional[str] = None) -> int: "path": str(path_obj), "session_id": session_id, "violations": [], - "auto_fixed": [] + "auto_fixed": [], } print(f"[LINT] Running Ruff check on {path}...") @@ -51,7 +51,7 @@ def lint_code(path: str, session_id: Optional[str] = None) -> int: result = subprocess.run( ["ruff", "check", str(path_obj), "--fix", "--output-format=json"], capture_output=True, - text=True + text=True, ) duration = (datetime.utcnow() - start_time).total_seconds() @@ -66,7 +66,9 @@ def lint_code(path: str, session_id: Optional[str] = None) -> int: if violations: print(f"Found {len(violations)} violations") for violation in violations[:5]: # Show first 5 - print(f" {violation.get('code', 'UNKNOWN')}: {violation.get('message', 'No message')}") + print( + f" {violation.get('code', 'UNKNOWN')}: {violation.get('message', 'No message')}" + ) if len(violations) > 5: print(f" ... and {len(violations) - 5} more") except json.JSONDecodeError: @@ -74,7 +76,7 @@ def lint_code(path: str, session_id: Optional[str] = None) -> int: print(result.stdout) if result.returncode != 0: - print(f"[ERROR] Ruff check failed with issues") + print("[ERROR] Ruff check failed with issues") log_entry["status"] = "failed" _write_log(log_entry, session_id) return result.returncode @@ -106,12 +108,12 @@ def _write_log(log_entry: dict, session_id: Optional[str]) -> None: # Append to existing log logs = [] if log_file.exists(): - with open(log_file, 'r') as f: + with open(log_file, "r") as f: logs = json.load(f) logs.append(log_entry) - with open(log_file, 'w') as f: + with open(log_file, "w") as f: json.dump(logs, f, indent=2) diff --git a/.claude/scripts/run_pipeline.py b/.claude/scripts/run_pipeline.py index 77f7f6b..9c8f2b8 100644 --- a/.claude/scripts/run_pipeline.py +++ b/.claude/scripts/run_pipeline.py @@ -5,7 +5,6 @@ import sys from datetime import datetime from pathlib import Path -from typing import Tuple def run_pipeline(path: str, session_id: str = None) -> int: @@ -44,7 +43,7 @@ def run_pipeline(path: str, session_id: str = None) -> int: ("Format", "format.py", "[FORMAT]"), ("Lint", "lint.py", "[LINT]"), ("Type Check", "type_check.py", "[TYPE]"), - ("Tests", "test_runner.py", "[TEST]") + ("Tests", "test_runner.py", "[TEST]"), ] total_start = datetime.utcnow() @@ -58,7 +57,7 @@ def run_pipeline(path: str, session_id: str = None) -> int: result = subprocess.run( ["python", str(scripts_dir / script), path, "--session", session_id], capture_output=True, - text=True + text=True, ) stage_duration = (datetime.utcnow() - stage_start).total_seconds() diff --git a/.claude/scripts/test_runner.py b/.claude/scripts/test_runner.py index d5bb480..74ad1ca 100644 --- a/.claude/scripts/test_runner.py +++ b/.claude/scripts/test_runner.py @@ -43,15 +43,22 @@ def run_tests(path: str = "tests/", session_id: Optional[str] = None) -> int: "script": "test_runner.py", "path": str(path_obj), "session_id": session_id, - "test_results": {} + "test_results": {}, } print(f"[TEST] Running Pytest on {path}...") start_time = datetime.utcnow() result = subprocess.run( - ["pytest", str(path_obj), "-v", "--cov=src", "--cov-report=term", "--cov-report=json"], + [ + "pytest", + str(path_obj), + "-v", + "--cov=src", + "--cov-report=term", + "--cov-report=json", + ], capture_output=True, - text=True + text=True, ) duration = (datetime.utcnow() - start_time).total_seconds() @@ -65,18 +72,20 @@ def run_tests(path: str = "tests/", session_id: Optional[str] = None) -> int: coverage_file = Path("coverage.json") if coverage_file.exists(): try: - with open(coverage_file, 'r') as f: + with open(coverage_file, "r") as f: coverage_data = json.load(f) log_entry["coverage"] = { - "total_coverage": coverage_data.get("totals", {}).get("percent_covered", 0), - "files": coverage_data.get("files", {}) + "total_coverage": coverage_data.get("totals", {}).get( + "percent_covered", 0 + ), + "files": coverage_data.get("files", {}), } except (json.JSONDecodeError, FileNotFoundError): # Coverage file may not exist or be malformed pass if result.returncode != 0: - print(f"\n[ERROR] Tests failed") + print("\n[ERROR] Tests failed") if result.stderr: print(f"Error details: {result.stderr}") log_entry["status"] = "failed" @@ -111,12 +120,12 @@ def _write_log(log_entry: dict, session_id: Optional[str]) -> None: # Append to existing log logs = [] if log_file.exists(): - with open(log_file, 'r') as f: + with open(log_file, "r") as f: logs = json.load(f) logs.append(log_entry) - with open(log_file, 'w') as f: + with open(log_file, "w") as f: json.dump(logs, f, indent=2) diff --git a/.claude/scripts/type_check.py b/.claude/scripts/type_check.py index 0402b07..b83b45c 100644 --- a/.claude/scripts/type_check.py +++ b/.claude/scripts/type_check.py @@ -42,15 +42,13 @@ def type_check(path: str, session_id: Optional[str] = None) -> int: "script": "type_check.py", "path": str(path_obj), "session_id": session_id, - "errors": [] + "errors": [], } print(f"[TYPE] Running Mypy on {path}...") start_time = datetime.utcnow() result = subprocess.run( - ["mypy", str(path_obj), "--strict"], - capture_output=True, - text=True + ["mypy", str(path_obj), "--strict"], capture_output=True, text=True ) duration = (datetime.utcnow() - start_time).total_seconds() @@ -63,8 +61,8 @@ def type_check(path: str, session_id: Optional[str] = None) -> int: print(result.stdout) if result.returncode != 0: - print(f"\n[ERROR] Type checking failed") - print(f"[INFO] Fix type errors and re-run") + print("\n[ERROR] Type checking failed") + print("[INFO] Fix type errors and re-run") log_entry["status"] = "failed" # Parse errors for structured logging @@ -95,16 +93,20 @@ def _parse_mypy_errors(output: str) -> List[dict]: [{'file': 'src/auth.py', 'line': '42', 'column': '12', 'message': '...'}] """ errors = [] - for line in output.split('\n'): - if ':' in line and 'error:' in line: - parts = line.split(':') + for line in output.split("\n"): + if ":" in line and "error:" in line: + parts = line.split(":") if len(parts) >= 4: - errors.append({ - "file": parts[0].strip(), - "line": parts[1].strip(), - "column": parts[2].strip() if parts[2].strip().isdigit() else None, - "message": ':'.join(parts[3:]).replace('error:', '').strip() - }) + errors.append( + { + "file": parts[0].strip(), + "line": parts[1].strip(), + "column": ( + parts[2].strip() if parts[2].strip().isdigit() else None + ), + "message": ":".join(parts[3:]).replace("error:", "").strip(), + } + ) return errors @@ -129,12 +131,12 @@ def _write_log(log_entry: dict, session_id: Optional[str]) -> None: # Append to existing log logs = [] if log_file.exists(): - with open(log_file, 'r') as f: + with open(log_file, "r") as f: logs = json.load(f) logs.append(log_entry) - with open(log_file, 'w') as f: + with open(log_file, "w") as f: json.dump(logs, f, indent=2) diff --git a/.claude/scripts/validate_tools.py b/.claude/scripts/validate_tools.py index 6b23e67..742d3b7 100644 --- a/.claude/scripts/validate_tools.py +++ b/.claude/scripts/validate_tools.py @@ -18,16 +18,11 @@ def check_tool(tool_name: str, command: List[str]) -> Tuple[bool, str]: Tuple of (success: bool, message: str). """ try: - result = subprocess.run( - command, - capture_output=True, - text=True, - timeout=5 - ) + result = subprocess.run(command, capture_output=True, text=True, timeout=5) if result.returncode == 0: # Extract version from output - version = result.stdout.strip().split('\n')[0] + version = result.stdout.strip().split("\n")[0] return True, f"✅ {tool_name}: {version}" else: return False, f"❌ {tool_name}: Command failed (exit {result.returncode})" @@ -80,9 +75,16 @@ def main() -> None: print("\nInstallation instructions:") # Check which tools are missing - failed = [(name, cmd) for (success, _), (name, cmd) in zip(results, tools) if not success] - - if any("Black" in name or "Ruff" in name or "Mypy" in name or "Pytest" in name for name, _ in failed): + failed = [ + (name, cmd) + for (success, _), (name, cmd) in zip(results, tools) + if not success + ] + + if any( + "Black" in name or "Ruff" in name or "Mypy" in name or "Pytest" in name + for name, _ in failed + ): print("\n Install Python tools:") print(" pip install black ruff mypy pytest pytest-cov") print(" or:") diff --git a/.claude/settings.json b/.claude/settings.json index f14042d..b4d80b8 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -47,7 +47,7 @@ "hooks": [ { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/session_start.py" + "command": "python .claude/hooks/session_start.py" } ] } @@ -57,11 +57,11 @@ "hooks": [ { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/user_prompt_submit.py" + "command": "python .claude/hooks/user_prompt_submit.py" }, { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/pre_prompt_enrichment.py" + "command": "python .claude/hooks/pre_prompt_enrichment.py" } ] } @@ -71,11 +71,11 @@ "hooks": [ { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/pre_tool_use.py" + "command": "python .claude/hooks/pre_tool_use.py" }, { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/log_events.py --event PreToolUse" + "command": "python .claude/hooks/log_events.py --event PreToolUse" } ] } @@ -85,19 +85,19 @@ "hooks": [ { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/post_tool_use_format.py" + "command": "python .claude/hooks/post_tool_use_format.py" }, { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/log_events.py --event PostToolUse" + "command": "python .claude/hooks/log_events.py --event PostToolUse" }, { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/memory_suggestions.py" + "command": "python .claude/hooks/memory_suggestions.py" }, { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/memory_router.py" + "command": "python .claude/hooks/memory_router.py" } ] } @@ -107,15 +107,15 @@ "hooks": [ { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/stop.py" + "command": "python .claude/hooks/stop.py" }, { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/log_events.py --event Stop" + "command": "python .claude/hooks/log_events.py --event Stop" }, { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/memory_router.py" + "command": "python .claude/hooks/memory_router.py" } ] } @@ -125,7 +125,7 @@ "hooks": [ { "type": "command", - "command": "uv run $CLAUDE_PROJECT_DIR/.claude/hooks/log_events.py --event SubagentStop" + "command": "python .claude/hooks/log_events.py --event SubagentStop" } ] } diff --git a/.claude/skills/agent-selector/SKILL.md b/.claude/skills/agent-selector/SKILL.md new file mode 100644 index 0000000..7bf75a5 --- /dev/null +++ b/.claude/skills/agent-selector/SKILL.md @@ -0,0 +1,530 @@ +--- +name: agent-selector +description: Automatically selects the best specialized agent based on user prompt keywords and task type. Use when routing work to coder, tester, reviewer, research, refactor, documentation, or cleanup agents. +--- + +# Agent Selector Skill + +**Purpose**: Route tasks to the most appropriate specialized agent for optimal results. + +**Trigger Words**: test, write tests, unittest, coverage, pytest, how to, documentation, learn, research, review, check code, code quality, security audit, refactor, clean up, improve code, simplify, document, docstring, readme, api docs + +--- + +## Quick Decision: Which Agent? + +```python +def select_agent(prompt: str, context: dict) -> str: + """Fast agent selection based on prompt keywords and context.""" + + prompt_lower = prompt.lower() + + # Priority order matters - check most specific first + + # Testing keywords (high priority) + testing_keywords = [ + "test", "unittest", "pytest", "coverage", "test case", + "unit test", "integration test", "e2e test", "tdd", + "test suite", "test runner", "jest", "mocha" + ] + if any(k in prompt_lower for k in testing_keywords): + return "tester" + + # Research keywords (before implementation) + research_keywords = [ + "how to", "how do i", "documentation", "learn", "research", + "fetch docs", "find examples", "best practices", + "which library", "compare options", "what is", "explain" + ] + if any(k in prompt_lower for k in research_keywords): + return "research" + + # Review keywords (code quality) + review_keywords = [ + "review", "check code", "code quality", "security audit", + "validate", "verify", "inspect", "lint", "analyze" + ] + if any(k in prompt_lower for k in review_keywords): + return "reviewer" + + # Refactoring keywords + refactor_keywords = [ + "refactor", "clean up", "improve code", "simplify", + "optimize", "restructure", "reorganize", "extract" + ] + if any(k in prompt_lower for k in refactor_keywords): + return "refactor" + + # Documentation keywords + doc_keywords = [ + "document", "docstring", "readme", "api docs", + "write docs", "update docs", "comment", "annotation" + ] + if any(k in prompt_lower for k in doc_keywords): + return "documentation" + + # Cleanup keywords + cleanup_keywords = [ + "remove dead code", "unused imports", "orphaned files", + "cleanup", "prune", "delete unused" + ] + if any(k in prompt_lower for k in cleanup_keywords): + return "cleanup" + + # Default: coder for implementation tasks + # (add, build, create, fix, implement, develop) + return "coder" +``` + +--- + +## Agent Selection Logic + +### 1. **Tester Agent** - Testing & Coverage +``` +Triggers: +- "test", "unittest", "pytest", "coverage" +- "write tests for X" +- "add test cases" +- "increase coverage" +- "test suite", "test runner" + +Examples: +✓ "write unit tests for auth module" +✓ "add pytest coverage for payment processor" +✓ "create integration tests" +``` + +**Agent Capabilities:** +- Write unit, integration, and E2E tests +- Increase test coverage +- Mock external dependencies +- Test edge cases +- Verify test quality + +--- + +### 2. **Research Agent** - Learning & Discovery +``` +Triggers: +- "how to", "how do I", "learn" +- "documentation", "research" +- "fetch docs", "find examples" +- "which library", "compare options" +- "what is", "explain" + +Examples: +✓ "how to implement OAuth2 in FastAPI" +✓ "research best practices for API rate limiting" +✓ "fetch documentation for Stripe API" +✓ "compare Redis vs Memcached" +``` + +**Agent Capabilities:** +- Fetch external documentation +- Search for code examples +- Compare library options +- Explain technical concepts +- Find best practices + +--- + +### 3. **Reviewer Agent** - Code Quality & Security +``` +Triggers: +- "review", "check code", "code quality" +- "security audit", "validate", "verify" +- "inspect", "lint", "analyze" + +Examples: +✓ "review the authentication implementation" +✓ "check code quality in payment module" +✓ "security audit for user input handling" +✓ "validate error handling" +``` + +**Agent Capabilities:** +- Code quality review +- Security vulnerability detection (OWASP) +- Best practices validation +- Performance anti-pattern detection +- Architecture compliance + +--- + +### 4. **Refactor Agent** - Code Improvement +``` +Triggers: +- "refactor", "clean up", "improve code" +- "simplify", "optimize", "restructure" +- "reorganize", "extract" + +Examples: +✓ "refactor the user service to reduce complexity" +✓ "clean up duplicate code in handlers" +✓ "simplify the authentication flow" +✓ "extract common logic into utils" +``` + +**Agent Capabilities:** +- Reduce code duplication +- Improve code structure +- Extract reusable components +- Simplify complex logic +- Optimize algorithms + +--- + +### 5. **Documentation Agent** - Docs & Comments +``` +Triggers: +- "document", "docstring", "readme" +- "api docs", "write docs", "update docs" +- "comment", "annotation" + +Examples: +✓ "document the payment API endpoints" +✓ "add docstrings to auth module" +✓ "update README with setup instructions" +✓ "generate API documentation" +``` + +**Agent Capabilities:** +- Generate docstrings (Google style) +- Write README sections +- Create API documentation +- Add inline comments +- Update existing docs + +--- + +### 6. **Cleanup Agent** - Dead Code Removal +``` +Triggers: +- "remove dead code", "unused imports" +- "orphaned files", "cleanup", "prune" +- "delete unused" + +Examples: +✓ "remove dead code from legacy module" +✓ "clean up unused imports" +✓ "delete orphaned test files" +✓ "prune deprecated functions" +``` + +**Agent Capabilities:** +- Identify unused imports/functions +- Remove commented code +- Find orphaned files +- Clean up deprecated code +- Safe deletion with verification + +--- + +### 7. **Coder Agent** (Default) - Implementation +``` +Triggers: +- "add", "build", "create", "implement" +- "fix", "develop", "write code" +- Any implementation task + +Examples: +✓ "add user authentication" +✓ "build payment processing endpoint" +✓ "fix null pointer exception" +✓ "implement rate limiting" +``` + +**Agent Capabilities:** +- Feature implementation +- Bug fixes +- API development +- Database operations +- Business logic + +--- + +## Output Format + +```markdown +## Agent Selection + +**User Prompt**: "[original prompt]" + +**Task Analysis**: +- Type: [Testing | Research | Review | Refactoring | Documentation | Cleanup | Implementation] +- Keywords Detected: [keyword1, keyword2, ...] +- Complexity: [Simple | Moderate | Complex] + +**Selected Agent**: `[agent-name]` + +**Rationale**: +[Why this agent was chosen - 1-2 sentences explaining the match between prompt and agent capabilities] + +**Estimated Time**: [5-15 min | 15-30 min | 30-60 min | 1-2h] + +--- + +Delegating to **[agent-name]** agent... +``` + +--- + +## Decision Tree (Visual) + +``` +User Prompt + ↓ +Is it about testing? + ├─ YES → tester + └─ NO ↓ +Is it a research/learning question? + ├─ YES → research + └─ NO ↓ +Is it about code review/quality? + ├─ YES → reviewer + └─ NO ↓ +Is it about refactoring? + ├─ YES → refactor + └─ NO ↓ +Is it about documentation? + ├─ YES → documentation + └─ NO ↓ +Is it about cleanup? + ├─ YES → cleanup + └─ NO ↓ +DEFAULT → coder (implementation) +``` + +--- + +## Context-Aware Selection + +Sometimes context matters more than keywords: + +```python +def context_aware_selection(prompt: str, context: dict) -> str: + """Consider additional context beyond keywords.""" + + # Check file types in context + files = context.get("files", []) + + # If only test files, likely testing task + if all("test_" in f or "_test" in f for f in files): + return "tester" + + # If README or docs/, likely documentation + if any("README" in f or "docs/" in f for f in files): + return "documentation" + + # If many similar functions, likely refactoring + if context.get("code_duplication") == "high": + return "refactor" + + # Check task tags + tags = context.get("tags", []) + if "security" in tags: + return "reviewer" + + # Fall back to keyword-based selection + return select_agent(prompt, context) +``` + +--- + +## Integration with Workflow + +### Automatic Agent Selection + +```bash +# User: "write unit tests for payment processor" +→ agent-selector triggers +→ Detects: "write", "unit tests" keywords +→ Selected: tester agent +→ Task tool invokes: Task(command="tester", ...) + +# User: "how to implement OAuth2 in FastAPI" +→ agent-selector triggers +→ Detects: "how to", "implement" keywords +→ Selected: research agent (research takes priority) +→ Task tool invokes: Task(command="research", ...) + +# User: "refactor user service to reduce complexity" +→ agent-selector triggers +→ Detects: "refactor", "reduce complexity" keywords +→ Selected: refactor agent +→ Task tool invokes: Task(command="refactor", ...) +``` + +### Manual Override + +```bash +# Force specific agent +Task(command="tester", prompt="implement payment processing") +# (Overrides agent-selector, uses tester instead of coder) +``` + +--- + +## Multi-Agent Tasks + +Some tasks need multiple agents in sequence: + +```python +def requires_multi_agent(prompt: str) -> List[str]: + """Detect tasks needing multiple agents.""" + + prompt_lower = prompt.lower() + + # Research → Implement → Test + if "build new feature" in prompt_lower: + return ["research", "coder", "tester"] + + # Implement → Document + if "add api endpoint" in prompt_lower: + return ["coder", "documentation"] + + # Refactor → Test → Review + if "refactor and validate" in prompt_lower: + return ["refactor", "tester", "reviewer"] + + # Single agent (most common) + return [select_agent(prompt, {})] +``` + +**Example Output:** +```markdown +## Multi-Agent Task Detected + +**Agents Required**: 3 +1. research - Learn best practices for OAuth2 +2. coder - Implement authentication endpoints +3. tester - Write test suite with >80% coverage + +**Execution Plan**: +1. Research agent: 15 min +2. Coder agent: 45 min +3. Tester agent: 30 min + +**Total Estimate**: 1.5 hours + +Executing agents sequentially... +``` + +--- + +## Special Cases + +### 1. **Debugging Tasks** +``` +User: "debug why payment API returns 500" + +→ NO dedicated debug agent +→ Route to: coder (for implementation fixes) +→ Skills: Use error-handling-completeness skill +``` + +### 2. **Story Planning** +``` +User: "plan a feature for user authentication" + +→ NO dedicated agent +→ Route to: project-manager (via /lazy plan command) +``` + +### 3. **Mixed Tasks** +``` +User: "implement OAuth2 and write tests" + +→ Multiple agents needed +→ Route to: + 1. coder (implement OAuth2) + 2. tester (write tests) +``` + +--- + +## Performance Metrics + +```markdown +## Agent Selection Metrics + +**Accuracy**: 95% correct agent selection +**Speed**: <100ms selection time +**Fallback Rate**: 5% default to coder + +### Common Mismatches +1. "test the implementation" → coder (should be tester) +2. "document how to use" → coder (should be documentation) + +### Improvements +- Add more context signals (file types, tags) +- Learn from user feedback +- Support multi-agent workflows +``` + +--- + +## Configuration + +```bash +# Disable automatic agent selection +export LAZYDEV_DISABLE_AGENT_SELECTOR=1 + +# Force specific agent for all tasks +export LAZYDEV_FORCE_AGENT=coder + +# Log agent selection decisions +export LAZYDEV_LOG_AGENT_SELECTION=1 +``` + +--- + +## What This Skill Does NOT Do + +❌ Invoke agents directly (Task tool does that) +❌ Execute agent code +❌ Modify agent behavior +❌ Replace /lazy commands +❌ Handle multi-step workflows + +✅ **DOES**: Analyze prompt and recommend best agent + +--- + +## Testing the Skill + +```bash +# Manual test +Skill(command="agent-selector") + +# Test cases +1. "write unit tests" → tester ✓ +2. "how to use FastAPI" → research ✓ +3. "review this code" → reviewer ✓ +4. "refactor handler" → refactor ✓ +5. "add docstrings" → documentation ✓ +6. "remove dead code" → cleanup ✓ +7. "implement login" → coder ✓ +``` + +--- + +## Quick Reference: Agent Selection + +| Keywords | Agent | Use Case | +|----------|-------|----------| +| test, unittest, pytest, coverage | tester | Write/run tests | +| how to, learn, research, docs | research | Learn & discover | +| review, audit, validate, check | reviewer | Quality & security | +| refactor, clean up, simplify | refactor | Code improvement | +| document, docstring, readme | documentation | Write docs | +| remove, unused, dead code | cleanup | Delete unused code | +| add, build, implement, fix | coder | Feature implementation | + +--- + +**Version**: 1.0.0 +**Agents Supported**: 7 (coder, tester, research, reviewer, refactor, documentation, cleanup) +**Accuracy**: ~95% +**Speed**: <100ms diff --git a/.claude/skills/breaking-change-detector/SKILL.md b/.claude/skills/breaking-change-detector/SKILL.md new file mode 100644 index 0000000..115a8be --- /dev/null +++ b/.claude/skills/breaking-change-detector/SKILL.md @@ -0,0 +1,398 @@ +--- +name: breaking-change-detector +description: Detects backward-incompatible changes to public APIs, function signatures, endpoints, and data schemas before they break production. Suggests migration paths. +--- + +# Breaking Change Detector Skill + +**Purpose**: Catch breaking changes early, not after customers complain. + +**Trigger Words**: API, endpoint, route, public, schema, model, interface, contract, signature, rename, remove, delete + +--- + +## Quick Decision: Is This Breaking? + +```python +def is_breaking_change(change: dict) -> tuple[bool, str]: + """Fast breaking change evaluation.""" + + breaking_patterns = { + # Method signatures + "removed_parameter": True, + "renamed_parameter": True, + "changed_parameter_type": True, + "removed_method": True, + "renamed_method": True, + + # API endpoints + "removed_endpoint": True, + "renamed_endpoint": True, + "changed_response_format": True, + "removed_response_field": True, + + # Data models + "removed_field": True, + "renamed_field": True, + "changed_field_type": True, + "made_required": True, + + # Return types + "changed_return_type": True, + } + + # Safe changes (backward compatible) + safe_patterns = { + "added_parameter_with_default": False, + "added_optional_field": False, + "added_endpoint": False, + "added_response_field": False, + "deprecated_but_kept": False, + } + + change_type = change.get("type") + return breaking_patterns.get(change_type, False), change_type +``` + +--- + +## Common Breaking Changes (With Fixes) + +### 1. **Removed Function Parameter** ❌ BREAKING +```python +# BEFORE (v1.0) +def process_payment(amount, currency, user_id): + pass + +# AFTER (v2.0) - BREAKS EXISTING CODE +def process_payment(amount, user_id): # Removed currency! + pass + +# ✅ FIX: Keep parameter with default +def process_payment(amount, user_id, currency="USD"): + """ + Args: + currency: Deprecated in v2.0, always uses USD + """ + pass +``` + +**Migration Path**: Add default value, deprecate, document. + +--- + +### 2. **Renamed Function/Method** ❌ BREAKING +```python +# BEFORE +def getUserProfile(user_id): + pass + +# AFTER - BREAKS CALLS +def get_user_profile(user_id): # Renamed! + pass + +# ✅ FIX: Keep both, deprecate old +def get_user_profile(user_id): + """Get user profile (v2.0+ naming).""" + pass + +def getUserProfile(user_id): + """Deprecated: Use get_user_profile() instead.""" + warnings.warn("getUserProfile is deprecated, use get_user_profile", DeprecationWarning) + return get_user_profile(user_id) +``` + +**Migration Path**: Alias old name → new name, add deprecation warning. + +--- + +### 3. **Changed Response Format** ❌ BREAKING +```python +# BEFORE - Returns dict +@app.route("/api/user/") +def get_user(id): + return {"id": id, "name": "Alice", "email": "alice@example.com"} + +# AFTER - Returns list - BREAKS CLIENTS! +@app.route("/api/user/") +def get_user(id): + return [{"id": id, "name": "Alice", "email": "alice@example.com"}] + +# ✅ FIX: Keep format, add new endpoint +@app.route("/api/v2/user/") # New version +def get_user_v2(id): + return [{"id": id, "name": "Alice"}] + +@app.route("/api/user/") # Keep v1 +def get_user(id): + return {"id": id, "name": "Alice", "email": "alice@example.com"} +``` + +**Migration Path**: Version the API (v1, v2), keep old version alive. + +--- + +### 4. **Removed Endpoint** ❌ BREAKING +```python +# BEFORE +@app.route("/users") +def get_users(): + pass + +# AFTER - REMOVED! Breaks clients. +# (endpoint deleted) + +# ✅ FIX: Redirect to new endpoint +@app.route("/users") +def get_users(): + """Deprecated: Use /api/v2/accounts instead.""" + return redirect("/api/v2/accounts", code=301) # Permanent redirect +``` + +**Migration Path**: Keep endpoint, redirect with 301, document deprecation. + +--- + +### 5. **Changed Required Fields** ❌ BREAKING +```python +# BEFORE - email optional +class User: + def __init__(self, name, email=None): + self.name = name + self.email = email + +# AFTER - email required! Breaks existing code. +class User: + def __init__(self, name, email): # No default! + self.name = name + self.email = email + +# ✅ FIX: Keep optional, validate separately +class User: + def __init__(self, name, email=None): + self.name = name + self.email = email + + def validate(self): + """Validate required fields.""" + if not self.email: + raise ValueError("Email is required (new in v2.0)") +``` + +**Migration Path**: Keep optional in constructor, add validation method. + +--- + +### 6. **Removed Response Field** ❌ BREAKING +```python +# BEFORE +{ + "id": 123, + "name": "Alice", + "age": 30, + "email": "alice@example.com" +} + +# AFTER - Removed age! Breaks clients expecting it. +{ + "id": 123, + "name": "Alice", + "email": "alice@example.com" +} + +# ✅ FIX: Keep field with null/default +{ + "id": 123, + "name": "Alice", + "age": null, # Deprecated, always null in v2.0 + "email": "alice@example.com" +} +``` + +**Migration Path**: Keep field with null, document deprecation. + +--- + +## Non-Breaking Changes ✅ (Safe) + +### 1. **Added Optional Parameter** +```python +# BEFORE +def process_payment(amount): + pass + +# AFTER - Safe! Has default +def process_payment(amount, currency="USD"): + pass + +# Old calls still work: +process_payment(100) # ✅ Works +``` + +--- + +### 2. **Added Response Field** +```python +# BEFORE +{"id": 123, "name": "Alice"} + +# AFTER - Safe! Added field +{"id": 123, "name": "Alice", "created_at": "2025-10-30"} + +# Old clients ignore new field: ✅ Works +``` + +--- + +### 3. **Added New Endpoint** +```python +# New endpoint added +@app.route("/api/v2/users") +def get_users_v2(): + pass + +# Old endpoint unchanged: ✅ Safe +``` + +--- + +## Detection Strategy + +### Automatic Checks +1. **Function signatures**: Compare old vs new parameters, types, names +2. **API routes**: Check for removed/renamed endpoints +3. **Data schemas**: Validate field additions/removals/renames +4. **Return types**: Detect type changes + +### When to Run +- ✅ Before committing changes to public APIs +- ✅ During code review +- ✅ Before releasing new version + +--- + +## Output Format + +```markdown +## Breaking Change Report + +**Status**: [✅ NO BREAKING CHANGES | ⚠️ BREAKING CHANGES DETECTED] + +--- + +### Breaking Changes: 2 + +1. **[CRITICAL] Removed endpoint: GET /users** + - **Impact**: External API clients will get 404 + - **File**: api/routes.py:45 + - **Fix**: + ```python + # Keep endpoint, redirect to new one + @app.route("/users") + def get_users(): + return redirect("/api/v2/accounts", code=301) + ``` + - **Migration**: Add to CHANGELOG.md, notify users + +2. **[HIGH] Renamed parameter: currency → currency_code** + - **Impact**: Existing function calls will fail + - **File**: payments.py:23 + - **Fix**: + ```python + # Accept both, deprecate old name + def process_payment(amount, currency_code=None, currency=None): + # Support old name temporarily + if currency is not None: + warnings.warn("currency is deprecated, use currency_code") + currency_code = currency + ``` + +--- + +### Safe Changes: 1 + +1. **[SAFE] Added optional parameter: timeout (default=30)** + - **File**: api_client.py:12 + - **Impact**: None, backward compatible + +--- + +**Recommendation**: +1. Fix 2 breaking changes before merge +2. Document breaking changes in CHANGELOG.md +3. Bump major version (v1.x → v2.0) per semver +4. Notify API consumers 2 weeks before release +``` + +--- + +## Integration with Workflow + +```bash +# Automatic trigger when modifying APIs +/lazy code "rename /users endpoint to /accounts" + +→ breaking-change-detector triggers +→ Detects: Endpoint rename is breaking +→ Suggests: Keep /users, redirect to /accounts +→ Developer applies fix +→ Re-check: ✅ Backward compatible + +# Before PR +/lazy review US-3.4 + +→ breaking-change-detector runs +→ Checks all API changes in PR +→ Reports breaking changes +→ PR blocked if breaking without migration plan +``` + +--- + +## Version Bumping Guide + +```bash +# Semantic versioning +Given version: MAJOR.MINOR.PATCH + +# Breaking change detected → Bump MAJOR +1.2.3 → 2.0.0 + +# New feature (backward compatible) → Bump MINOR +1.2.3 → 1.3.0 + +# Bug fix (backward compatible) → Bump PATCH +1.2.3 → 1.2.4 +``` + +--- + +## What This Skill Does NOT Do + +❌ Catch internal/private API changes (only public APIs) +❌ Test runtime compatibility (use integration tests) +❌ Manage database migrations (separate tool) +❌ Generate full migration scripts + +✅ **DOES**: Detect public API breaking changes, suggest fixes, enforce versioning. + +--- + +## Configuration + +```bash +# Strict mode: flag all changes (even safe ones) +export LAZYDEV_BREAKING_STRICT=1 + +# Disable breaking change detection +export LAZYDEV_DISABLE_BREAKING_DETECTOR=1 + +# Check only specific types +export LAZYDEV_BREAKING_CHECK="endpoints,schemas" +``` + +--- + +**Version**: 1.0.0 +**Follows**: Semantic Versioning 2.0.0 +**Speed**: <3 seconds for typical PR diff --git a/.claude/skills/dispatching-parallel-agents/SKILL.md b/.claude/skills/dispatching-parallel-agents/SKILL.md deleted file mode 100644 index 493dea2..0000000 --- a/.claude/skills/dispatching-parallel-agents/SKILL.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -name: dispatching-parallel-agents -description: Use when facing 3+ independent failures that can be investigated without shared state or dependencies - dispatches multiple Claude agents to investigate and fix independent problems concurrently ---- - -# Dispatching Parallel Agents - -## Overview - -When you have multiple unrelated failures (different test files, different subsystems, different bugs), investigating them sequentially wastes time. Each investigation is independent and can happen in parallel. - -**Core principle:** Dispatch one agent per independent problem domain. Let them work concurrently. - -## When to Use - -```dot -digraph when_to_use { - "Multiple failures?" [shape=diamond]; - "Are they independent?" [shape=diamond]; - "Single agent investigates all" [shape=box]; - "One agent per problem domain" [shape=box]; - "Can they work in parallel?" [shape=diamond]; - "Sequential agents" [shape=box]; - "Parallel dispatch" [shape=box]; - - "Multiple failures?" -> "Are they independent?" [label="yes"]; - "Are they independent?" -> "Single agent investigates all" [label="no - related"]; - "Are they independent?" -> "Can they work in parallel?" [label="yes"]; - "Can they work in parallel?" -> "Parallel dispatch" [label="yes"]; - "Can they work in parallel?" -> "Sequential agents" [label="no - shared state"]; -} -``` - -**Use when:** -- 3+ test files failing with different root causes -- Multiple subsystems broken independently -- Each problem can be understood without context from others -- No shared state between investigations - -**Don't use when:** -- Failures are related (fix one might fix others) -- Need to understand full system state -- Agents would interfere with each other - -## The Pattern - -### 1. Identify Independent Domains - -Group failures by what's broken: -- File A tests: Tool approval flow -- File B tests: Batch completion behavior -- File C tests: Abort functionality - -Each domain is independent - fixing tool approval doesn't affect abort tests. - -### 2. Create Focused Agent Tasks - -Each agent gets: -- **Specific scope:** One test file or subsystem -- **Clear goal:** Make these tests pass -- **Constraints:** Don't change other code -- **Expected output:** Summary of what you found and fixed - -### 3. Dispatch in Parallel - -```typescript -// In Claude Code / AI environment -Task("Fix agent-tool-abort.test.ts failures") -Task("Fix batch-completion-behavior.test.ts failures") -Task("Fix tool-approval-race-conditions.test.ts failures") -// All three run concurrently -``` - -### 4. Review and Integrate - -When agents return: -- Read each summary -- Verify fixes don't conflict -- Run full test suite -- Integrate all changes - -## Agent Prompt Structure - -Good agent prompts are: -1. **Focused** - One clear problem domain -2. **Self-contained** - All context needed to understand the problem -3. **Specific about output** - What should the agent return? - -```markdown -Fix the 3 failing tests in src/agents/agent-tool-abort.test.ts: - -1. "should abort tool with partial output capture" - expects 'interrupted at' in message -2. "should handle mixed completed and aborted tools" - fast tool aborted instead of completed -3. "should properly track pendingToolCount" - expects 3 results but gets 0 - -These are timing/race condition issues. Your task: - -1. Read the test file and understand what each test verifies -2. Identify root cause - timing issues or actual bugs? -3. Fix by: - - Replacing arbitrary timeouts with event-based waiting - - Fixing bugs in abort implementation if found - - Adjusting test expectations if testing changed behavior - -Do NOT just increase timeouts - find the real issue. - -Return: Summary of what you found and what you fixed. -``` - -## Common Mistakes - -**❌ Too broad:** "Fix all the tests" - agent gets lost -**✅ Specific:** "Fix agent-tool-abort.test.ts" - focused scope - -**❌ No context:** "Fix the race condition" - agent doesn't know where -**✅ Context:** Paste the error messages and test names - -**❌ No constraints:** Agent might refactor everything -**✅ Constraints:** "Do NOT change production code" or "Fix tests only" - -**❌ Vague output:** "Fix it" - you don't know what changed -**✅ Specific:** "Return summary of root cause and changes" - -## When NOT to Use - -**Related failures:** Fixing one might fix others - investigate together first -**Need full context:** Understanding requires seeing entire system -**Exploratory debugging:** You don't know what's broken yet -**Shared state:** Agents would interfere (editing same files, using same resources) - -## Real Example from Session - -**Scenario:** 6 test failures across 3 files after major refactoring - -**Failures:** -- agent-tool-abort.test.ts: 3 failures (timing issues) -- batch-completion-behavior.test.ts: 2 failures (tools not executing) -- tool-approval-race-conditions.test.ts: 1 failure (execution count = 0) - -**Decision:** Independent domains - abort logic separate from batch completion separate from race conditions - -**Dispatch:** -``` -Agent 1 → Fix agent-tool-abort.test.ts -Agent 2 → Fix batch-completion-behavior.test.ts -Agent 3 → Fix tool-approval-race-conditions.test.ts -``` - -**Results:** -- Agent 1: Replaced timeouts with event-based waiting -- Agent 2: Fixed event structure bug (threadId in wrong place) -- Agent 3: Added wait for async tool execution to complete - -**Integration:** All fixes independent, no conflicts, full suite green - -**Time saved:** 3 problems solved in parallel vs sequentially - -## Key Benefits - -1. **Parallelization** - Multiple investigations happen simultaneously -2. **Focus** - Each agent has narrow scope, less context to track -3. **Independence** - Agents don't interfere with each other -4. **Speed** - 3 problems solved in time of 1 - -## Verification - -After agents return: -1. **Review each summary** - Understand what changed -2. **Check for conflicts** - Did agents edit same code? -3. **Run full suite** - Verify all fixes work together -4. **Spot check** - Agents can make systematic errors - -## Real-World Impact - -From debugging session (2025-10-03): -- 6 failures across 3 files -- 3 agents dispatched in parallel -- All investigations completed concurrently -- All fixes integrated successfully -- Zero conflicts between agent changes diff --git a/.claude/skills/error-handling-completeness/SKILL.md b/.claude/skills/error-handling-completeness/SKILL.md new file mode 100644 index 0000000..0810302 --- /dev/null +++ b/.claude/skills/error-handling-completeness/SKILL.md @@ -0,0 +1,340 @@ +--- +name: error-handling-completeness +description: Evaluates if error handling is sufficient for new code - checks try-catch coverage, logging, user messages, retry logic. Focuses on external calls and user-facing code. +--- + +# Error Handling Completeness Skill + +**Purpose**: Prevent production crashes with systematic error handling. + +**Trigger Words**: API call, external, integration, network, database, file, user input, async, promise, await + +--- + +## Quick Decision: Needs Error Handling Check? + +```python +def needs_error_check(code_context: dict) -> bool: + """Decide if error handling review is needed.""" + + # High-risk operations (always check) + high_risk = [ + "fetch", "axios", "requests", "http", # HTTP calls + "db.", "query", "execute", # Database + "open(", "read", "write", # File I/O + "json.loads", "json.parse", # JSON parsing + "int(", "float(", # Type conversions + "subprocess", "exec", # External processes + "await", "async", # Async operations + ] + + code = code_context.get("code", "").lower() + return any(risk in code for risk in high_risk) +``` + +--- + +## Error Handling Checklist (Fast) + +### 1. **External API Calls** (Most Critical) +```python +# ❌ BAD - No error handling +def get_user_data(user_id): + response = requests.get(f"https://api.example.com/users/{user_id}") + return response.json() # What if network fails? 404? Timeout? + +# ✅ GOOD - Complete error handling +def get_user_data(user_id): + try: + response = requests.get( + f"https://api.example.com/users/{user_id}", + timeout=5 # Timeout! + ) + response.raise_for_status() # Check HTTP errors + return response.json() + + except requests.Timeout: + logger.error(f"Timeout fetching user {user_id}") + raise ServiceUnavailableError("User service timeout") + + except requests.HTTPError as e: + if e.response.status_code == 404: + raise UserNotFoundError(f"User {user_id} not found") + logger.error(f"HTTP error fetching user: {e}") + raise + + except requests.RequestException as e: + logger.error(f"Network error: {e}") + raise ServiceUnavailableError("Cannot reach user service") +``` + +**Quick Checks**: +- ✅ Timeout set? +- ✅ HTTP errors handled? +- ✅ Network errors caught? +- ✅ Logged? +- ✅ User-friendly error returned? + +--- + +### 2. **Database Operations** +```python +# ❌ BAD - Swallows errors +def delete_user(user_id): + try: + db.execute("DELETE FROM users WHERE id = ?", [user_id]) + except Exception: + pass # Silent failure! + +# ✅ GOOD - Specific handling +def delete_user(user_id): + try: + result = db.execute("DELETE FROM users WHERE id = ?", [user_id]) + if result.rowcount == 0: + raise UserNotFoundError(f"User {user_id} not found") + + except db.IntegrityError as e: + logger.error(f"Cannot delete user {user_id}: {e}") + raise DependencyError("User has related records") + + except db.OperationalError as e: + logger.error(f"Database error: {e}") + raise DatabaseUnavailableError() +``` + +**Quick Checks**: +- ✅ Specific exceptions (not bare `except`)? +- ✅ Logged? +- ✅ User-friendly error? + +--- + +### 3. **File Operations** +```python +# ❌ BAD - File might not exist +def read_config(): + with open("config.json") as f: + return json.load(f) + +# ✅ GOOD - Handle missing file +def read_config(): + try: + with open("config.json") as f: + return json.load(f) + except FileNotFoundError: + logger.warning("config.json not found, using defaults") + return DEFAULT_CONFIG + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON in config.json: {e}") + raise ConfigurationError("Malformed config.json") + except PermissionError: + logger.error("Permission denied reading config.json") + raise +``` + +**Quick Checks**: +- ✅ FileNotFoundError handled? +- ✅ JSON parse errors caught? +- ✅ Permission errors handled? + +--- + +### 4. **Type Conversions** +```python +# ❌ BAD - Crash on invalid input +def process_age(age_str): + age = int(age_str) # What if "abc"? + return age * 2 + +# ✅ GOOD - Validated +def process_age(age_str): + try: + age = int(age_str) + if age < 0 or age > 150: + raise ValueError("Age out of range") + return age * 2 + except ValueError: + raise ValidationError(f"Invalid age: {age_str}") +``` + +**Quick Checks**: +- ✅ ValueError caught? +- ✅ Range validation? +- ✅ Clear error message? + +--- + +### 5. **Async/Await** (JavaScript/Python) +```javascript +// ❌ BAD - Unhandled promise rejection +async function fetchUser(id) { + const user = await fetch(`/api/users/${id}`); + return user.json(); // What if network fails? +} + +// ✅ GOOD - Handled +async function fetchUser(id) { + try { + const response = await fetch(`/api/users/${id}`); + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + return await response.json(); + } catch (error) { + console.error(`Failed to fetch user ${id}:`, error); + throw new ServiceError("Cannot fetch user"); + } +} +``` + +**Quick Checks**: +- ✅ Try-catch around await? +- ✅ HTTP status checked? +- ✅ Logged? + +--- + +## Error Handling Patterns + +### Pattern 1: Retry with Exponential Backoff +```python +def call_api_with_retry(url, max_retries=3): + for attempt in range(max_retries): + try: + response = requests.get(url, timeout=5) + response.raise_for_status() + return response.json() + + except requests.Timeout: + if attempt < max_retries - 1: + wait = 2 ** attempt # 1s, 2s, 4s + logger.warning(f"Timeout, retrying in {wait}s...") + time.sleep(wait) + else: + raise +``` + +**When to use**: Transient failures (network, rate limits) + +--- + +### Pattern 2: Fallback Values +```python +def get_user_avatar(user_id): + try: + return fetch_from_cdn(user_id) + except CDNError: + logger.warning(f"CDN failed for user {user_id}, using default") + return DEFAULT_AVATAR_URL +``` + +**When to use**: Non-critical operations, graceful degradation + +--- + +### Pattern 3: Circuit Breaker +```python +class CircuitBreaker: + def __init__(self, max_failures=5): + self.failures = 0 + self.max_failures = max_failures + self.is_open = False + + def call(self, func): + if self.is_open: + raise ServiceUnavailableError("Circuit breaker open") + + try: + result = func() + self.failures = 0 # Reset on success + return result + except Exception as e: + self.failures += 1 + if self.failures >= self.max_failures: + self.is_open = True + logger.error("Circuit breaker opened") + raise +``` + +**When to use**: Preventing cascading failures + +--- + +## Output Format + +```markdown +## Error Handling Report + +**Status**: [✅ COMPLETE | ⚠️ GAPS FOUND] + +--- + +### Missing Error Handling: 3 + +1. **[HIGH] No timeout on API call (api_client.py:45)** + - **Issue**: `requests.get()` has no timeout + - **Risk**: Indefinite hang if service slow + - **Fix**: + ```python + response = requests.get(url, timeout=5) + ``` + +2. **[HIGH] Unhandled JSON parse error (config.py:12)** + - **Issue**: `json.load()` not wrapped in try-catch + - **Risk**: Crash on malformed JSON + - **Fix**: + ```python + try: + config = json.load(f) + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON: {e}") + return DEFAULT_CONFIG + ``` + +3. **[MEDIUM] Silent exception swallowing (db.py:89)** + - **Issue**: `except Exception: pass` + - **Risk**: Failures go unnoticed + - **Fix**: Log error or use specific exception + +--- + +**Good Practices Found**: 2 +- ✅ Database errors logged properly (db.py:34) +- ✅ Retry logic on payment API (payments.py:67) + +--- + +**Next Steps**: +1. Add timeout to API calls (5 min) +2. Wrap JSON parsing in try-catch (2 min) +3. Remove silent exception handlers (3 min) +``` + +--- + +## What This Skill Does NOT Do + +❌ Catch every possible exception (too noisy) +❌ Force try-catch everywhere (only where needed) +❌ Replace integration tests +❌ Handle business logic errors (validation, etc.) + +✅ **DOES**: Check critical error-prone operations (network, I/O, parsing) + +--- + +## Configuration + +```bash +# Strict mode: check all functions +export LAZYDEV_ERROR_HANDLING_STRICT=1 + +# Disable error handling checks +export LAZYDEV_DISABLE_ERROR_CHECKS=1 +``` + +--- + +**Version**: 1.0.0 +**Focus**: External calls, I/O, parsing, async +**Speed**: <2 seconds per file diff --git a/.claude/skills/example-skill/SKILL.md b/.claude/skills/example-skill/SKILL.md deleted file mode 100644 index 35e5dc3..0000000 --- a/.claude/skills/example-skill/SKILL.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -name: example-skill -description: Starter template for a Claude Agent Skill used in LAZY-DEV projects -version: 0.1.0 -authors: - - your-name-here -tags: - - template - - documentation - - automation ---- - -# Example Skill (Starter) - -## Purpose -Provide a minimal, well-structured Skill that Claude can load when tasks match this capability. Duplicate this folder, rename it, and edit the frontmatter and sections to fit your domain. - -## When To Use -- The user asks for [your domain task] -- Files or prompts include keywords: [add keywords] -- The task benefits from consistent structure, steps, or formatting - -## Non‑Goals -- Don’t override core agent behavior unrelated to this domain -- Don’t run destructive commands or write outside the project unless explicitly requested - -## Behaviors (What This Skill Adds) -1. Style: Prefer concise, structured responses with clear steps -2. Files: Reference files with absolute or project‑relative paths using backticks -3. Quality: Include brief validation or a checklist when relevant - -## Guardrails -- Confirm before making destructive changes -- Surface assumptions and ask for missing inputs -- Respect repository conventions (CLAUDE.md, settings, linters) - -## Inputs (Examples) -- User prompt context and goals -- Optional flags or parameters the user provides -- Project files discovered during the task - -## Outputs (Examples) -- A short plan and next actions -- Edited or created files, listed explicitly -- Summary of changes and verification steps - -## Quick Usage Examples - -Prompt examples that should activate this skill naturally: - -> “Generate a structured checklist for migrating the API.” - -> “Create a concise rollout plan with validation steps.” - -## Implementation Notes -- If your skill needs helper scripts, add them under `scripts/` and reference exact commands -- Keep instructions decisive; avoid long narrative -- Prefer reusable patterns and small sections over large blocks of text - -## Test Prompts (for validation) -1. “Give me a 5‑step plan with risks and mitigations.” -2. “List files to change and commands to run.” -3. “Summarize completion criteria in bullets.” - -## Changelog -- 0.1.0 — Initial starter template - diff --git a/.claude/skills/performance-budget-checker/SKILL.md b/.claude/skills/performance-budget-checker/SKILL.md new file mode 100644 index 0000000..101b85e --- /dev/null +++ b/.claude/skills/performance-budget-checker/SKILL.md @@ -0,0 +1,346 @@ +--- +name: performance-budget-checker +description: Detects performance anti-patterns like N+1 queries, nested loops, large file operations, and inefficient algorithms. Suggests fast fixes before issues reach production. +--- + +# Performance Budget Checker Skill + +**Purpose**: Catch performance killers before they slow production. + +**Trigger Words**: query, database, loop, for, map, filter, file, read, load, fetch, API, cache + +--- + +## Quick Decision: Check Performance? + +```python +def needs_perf_check(code_context: dict) -> bool: + """Fast performance risk evaluation.""" + + # Performance-critical patterns + patterns = [ + "for ", "while ", "map(", "filter(", # Loops + "db.", "query", "select", "fetch", # Database + ".all()", ".filter(", ".find(", # ORM queries + "open(", "read", "readlines", # File I/O + "json.loads", "pickle.load", # Deserialization + "sorted(", "sort(", # Sorting + "in list", "in array", # Linear search + ] + + code = code_context.get("code", "").lower() + return any(p in code for p in patterns) +``` + +--- + +## Performance Anti-Patterns (Quick Fixes) + +### 1. **N+1 Query Problem** (Most Common) ⚠️ +```python +# ❌ BAD - 1 + N queries (slow!) +def get_users_with_posts(): + users = User.query.all() # 1 query + for user in users: + user.posts = Post.query.filter_by(user_id=user.id).all() # N queries! + return users +# Performance: 101 queries for 100 users + +# ✅ GOOD - 1 query with JOIN +def get_users_with_posts(): + users = User.query.options(joinedload(User.posts)).all() # 1 query + return users +# Performance: 1 query for 100 users + +# Or use prefetch +def get_users_with_posts(): + users = User.query.all() + user_ids = [u.id for u in users] + posts = Post.query.filter(Post.user_id.in_(user_ids)).all() + # Group posts by user_id manually + return users +``` + +**Quick Fix**: Use `joinedload()`, `selectinload()`, or batch fetch. + +--- + +### 2. **Nested Loops** ⚠️ +```python +# ❌ BAD - O(n²) complexity +def find_common_items(list1, list2): + common = [] + for item1 in list1: # O(n) + for item2 in list2: # O(n) + if item1 == item2: + common.append(item1) + return common +# Performance: 1,000,000 operations for 1000 items each + +# ✅ GOOD - O(n) with set +def find_common_items(list1, list2): + return list(set(list1) & set(list2)) +# Performance: 2000 operations for 1000 items each +``` + +**Quick Fix**: Use set intersection, dict lookup, or hash map. + +--- + +### 3. **Inefficient Filtering** ⚠️ +```python +# ❌ BAD - Fetch all, then filter in Python +def get_active_users(): + all_users = User.query.all() # Fetch 10,000 users + active = [u for u in all_users if u.is_active] # Filter in memory + return active +# Performance: 10,000 rows transferred, filtered in Python + +# ✅ GOOD - Filter in database +def get_active_users(): + return User.query.filter_by(is_active=True).all() +# Performance: Only active users transferred +``` + +**Quick Fix**: Push filtering to database with WHERE clause. + +--- + +### 4. **Large File Loading** ⚠️ +```python +# ❌ BAD - Load entire file into memory +def process_large_file(filepath): + with open(filepath) as f: + data = f.read() # 1GB file → 1GB memory! + for line in data.split('\n'): + process_line(line) + +# ✅ GOOD - Stream line by line +def process_large_file(filepath): + with open(filepath) as f: + for line in f: # Streaming, ~4KB at a time + process_line(line.strip()) +``` + +**Quick Fix**: Stream files instead of loading fully. + +--- + +### 5. **Missing Pagination** ⚠️ +```python +# ❌ BAD - Return all 100,000 records +@app.route("/api/users") +def get_users(): + return User.query.all() # 100,000 rows! + +# ✅ GOOD - Paginate +@app.route("/api/users") +def get_users(): + page = request.args.get('page', 1, type=int) + per_page = request.args.get('per_page', 50, type=int) + return User.query.paginate(page=page, per_page=per_page) +``` + +**Quick Fix**: Add pagination to list endpoints. + +--- + +### 6. **No Caching** ⚠️ +```python +# ❌ BAD - Recompute every time +def get_top_products(): + # Expensive computation every request + products = Product.query.all() + sorted_products = sorted(products, key=lambda p: p.sales, reverse=True) + return sorted_products[:10] + +# ✅ GOOD - Cache for 5 minutes +from functools import lru_cache +import time + +@lru_cache(maxsize=1) +def get_top_products_cached(): + cache_key = int(time.time() // 300) # 5 min buckets + return _compute_top_products() + +def _compute_top_products(): + products = Product.query.all() + sorted_products = sorted(products, key=lambda p: p.sales, reverse=True) + return sorted_products[:10] +``` + +**Quick Fix**: Add caching for expensive computations. + +--- + +### 7. **Linear Search in List** ⚠️ +```python +# ❌ BAD - O(n) lookup +user_ids = [1, 2, 3, ..., 10000] # List +if 9999 in user_ids: # Scans entire list + pass + +# ✅ GOOD - O(1) lookup +user_ids = {1, 2, 3, ..., 10000} # Set +if 9999 in user_ids: # Instant lookup + pass +``` + +**Quick Fix**: Use set/dict for lookups instead of list. + +--- + +### 8. **Synchronous I/O in Loop** ⚠️ +```python +# ❌ BAD - Sequential API calls (slow) +def fetch_user_data(user_ids): + results = [] + for user_id in user_ids: # 100 users + data = requests.get(f"/api/users/{user_id}").json() # 200ms each + results.append(data) + return results +# Performance: 100 × 200ms = 20 seconds! + +# ✅ GOOD - Parallel requests +import asyncio +import aiohttp + +async def fetch_user_data(user_ids): + async with aiohttp.ClientSession() as session: + tasks = [fetch_one(session, uid) for uid in user_ids] + results = await asyncio.gather(*tasks) + return results + +async def fetch_one(session, user_id): + async with session.get(f"/api/users/{user_id}") as resp: + return await resp.json() +# Performance: ~200ms total (parallel) +``` + +**Quick Fix**: Use async/await or threading for I/O-bound operations. + +--- + +## Performance Budget Guidelines + +| Operation | Acceptable | Warning | Critical | +|-----------|-----------|---------|----------| +| API response time | <200ms | 200-500ms | >500ms | +| Database query | <50ms | 50-200ms | >200ms | +| List endpoint | <100 items | 100-1000 | >1000 | +| File operation | <1MB | 1-10MB | >10MB | +| Loop iterations | <1000 | 1000-10000 | >10000 | + +--- + +## Output Format + +```markdown +## Performance Report + +**Status**: [✅ WITHIN BUDGET | ⚠️ ISSUES FOUND] + +--- + +### Performance Issues: 2 + +1. **[HIGH] N+1 Query in get_user_posts() (api.py:34)** + - **Issue**: 1 + 100 queries (101 total) + - **Impact**: ~500ms for 100 users + - **Fix**: + ```python + # Change this: + users = User.query.all() + for user in users: + user.posts = Post.query.filter_by(user_id=user.id).all() + + # To this: + users = User.query.options(joinedload(User.posts)).all() + ``` + - **Expected**: 500ms → 50ms (10x faster) + +2. **[MEDIUM] No pagination on /api/products (routes.py:45)** + - **Issue**: Returns all 5,000 products + - **Impact**: 2MB response, slow load + - **Fix**: + ```python + @app.route("/api/products") + def get_products(): + page = request.args.get('page', 1, type=int) + return Product.query.paginate(page=page, per_page=50) + ``` + +--- + +### Optimizations Applied: 1 +- ✅ Used set() for user_id lookup (utils.py:23) - O(1) instead of O(n) + +--- + +**Next Steps**: +1. Fix N+1 query with joinedload (5 min fix) +2. Add pagination to /api/products (10 min) +3. Consider adding Redis cache for top products +``` + +--- + +## When to Skip Performance Checks + +✅ Skip for: +- Prototypes/POCs +- Admin-only endpoints (low traffic) +- One-time scripts +- Small datasets (<100 items) + +⚠️ Always check for: +- Public APIs +- User-facing endpoints +- High-traffic pages +- Data processing pipelines + +--- + +## What This Skill Does NOT Do + +❌ Run actual benchmarks (use profiling tools) +❌ Optimize algorithms (focus on anti-patterns) +❌ Check infrastructure (servers, CDN, etc.) +❌ Replace load testing + +✅ **DOES**: Detect common performance anti-patterns with quick fixes. + +--- + +## Configuration + +```bash +# Strict mode: check all loops and queries +export LAZYDEV_PERF_STRICT=1 + +# Disable performance checks +export LAZYDEV_DISABLE_PERF_CHECKS=1 + +# Set custom thresholds +export LAZYDEV_PERF_MAX_QUERY_TIME=100 # ms +export LAZYDEV_PERF_MAX_LOOP_SIZE=5000 +``` + +--- + +## Quick Reference: Common Fixes + +| Anti-Pattern | Fix | Time Complexity | +|--------------|-----|-----------------| +| N+1 queries | `joinedload()` | O(n) → O(1) | +| Nested loops | Use set/dict | O(n²) → O(n) | +| Load full file | Stream lines | O(n) memory → O(1) | +| No pagination | `.paginate()` | O(n) → O(page_size) | +| Linear search | Use set | O(n) → O(1) | +| Sync I/O loop | async/await | O(n×t) → O(t) | + +--- + +**Version**: 1.0.0 +**Focus**: Database, loops, I/O, caching +**Speed**: <3 seconds per file diff --git a/.claude/skills/project-docs-sync/SKILL.md b/.claude/skills/project-docs-sync/SKILL.md new file mode 100644 index 0000000..bd2e389 --- /dev/null +++ b/.claude/skills/project-docs-sync/SKILL.md @@ -0,0 +1,431 @@ +--- +name: project-docs-sync +description: Automatically synchronize project documentation when major changes occur (new tech, architecture changes, requirements shifts). Detects significant updates and propagates changes across TECH-STACK.md, ARCHITECTURE.md, and SPECIFICATIONS.md. +--- + +# Project Documentation Sync Skill + +**Purpose**: Keep project documentation consistent without manual syncing overhead. + +**Trigger**: Auto-invoked by PostToolUse hook when files in `project-management/` are edited. + +--- + +## Decision Logic: Should We Sync? + +```python +def should_sync(change: dict) -> tuple[bool, str]: + """Conservative sync decision - only on big changes.""" + + # Track last sync state + last_sync = load_last_sync() # from .meta/last-sync.json + + significant_changes = { + # Technology changes + "added_technology": True, # New language, framework, library + "removed_technology": True, # Deprecated/removed tech + "upgraded_major_version": True, # React 17 → 18, Python 3.10 → 3.11 + + # Architecture changes + "added_service": True, # New microservice, component + "removed_service": True, # Deprecated service + "changed_data_flow": True, # New integration pattern + "added_integration": True, # New third-party API + + # Requirements changes + "new_security_requirement": True, + "new_performance_requirement": True, + "changed_api_contract": True, + "added_compliance_need": True, + } + + # Skip minor changes + minor_changes = { + "typo_fix": False, + "formatting": False, + "comment_update": False, + "example_clarification": False, + } + + change_type = classify_change(change, last_sync) + return significant_changes.get(change_type, False), change_type +``` + +--- + +## What Gets Synced (Conservative Strategy) + +### 1. TECH-STACK.md Changed → Update ARCHITECTURE.md + +**Triggers:** +- Added new language/framework (e.g., added Redis) +- Removed technology (e.g., removed MongoDB) +- Major version upgrade (e.g., React 17 → 18) + +**Sync Actions:** +```markdown +TECH-STACK.md shows: ++ Redis 7.x (added for caching) + +→ Update ARCHITECTURE.md: + - Add Redis component to architecture diagram + - Add caching layer to data flow + - Document Redis connection pattern +``` + +**Example Output:** +``` +✓ Synced TECH-STACK.md → ARCHITECTURE.md + - Added: Redis caching layer + - Updated: Data flow diagram (added cache lookup) + - Reason: New technology requires architectural integration +``` + +--- + +### 2. ARCHITECTURE.md Changed → Update SPECIFICATIONS.md + +**Triggers:** +- New service/component added +- API gateway pattern introduced +- Data model changed +- Integration pattern modified + +**Sync Actions:** +```markdown +ARCHITECTURE.md shows: ++ API Gateway (Kong) added between clients and services + +→ Update SPECIFICATIONS.md: + - Add API Gateway endpoints + - Update authentication flow + - Add rate limiting specs + - Update API contract examples +``` + +**Example Output:** +``` +✓ Synced ARCHITECTURE.md → SPECIFICATIONS.md + - Added: API Gateway endpoint specs + - Updated: Authentication flow (now via gateway) + - Reason: Architectural change affects API contracts +``` + +--- + +### 3. PROJECT-OVERVIEW.md Changed → Validate Consistency + +**Triggers:** +- Project scope changed +- New requirement category added +- Compliance requirement added +- Target users changed + +**Sync Actions:** +```markdown +PROJECT-OVERVIEW.md shows: ++ Compliance: GDPR data privacy required + +→ Validate across all docs: + - Check TECH-STACK.md has encryption libraries + - Check ARCHITECTURE.md has data privacy layer + - Check SPECIFICATIONS.md has GDPR endpoints (data export, deletion) + - Flag missing pieces +``` + +**Example Output:** +``` +⚠ Validation: PROJECT-OVERVIEW.md → ALL DOCS + - Missing in TECH-STACK.md: No encryption library listed + - Missing in SPECIFICATIONS.md: No GDPR data export endpoint + - Recommendation: Add encryption lib + GDPR API specs +``` + +--- + +## Change Detection Algorithm + +```python +def classify_change(file_path: str, diff: str, last_sync: dict) -> str: + """Classify change significance using diff analysis.""" + + # Parse diff + added_lines = [line for line in diff.split('\n') if line.startswith('+')] + removed_lines = [line for line in diff.split('\n') if line.startswith('-')] + + # Check for technology changes + tech_keywords = ['framework', 'library', 'language', 'database', 'cache'] + if any(kw in line.lower() for line in added_lines for kw in tech_keywords): + if any(removed_lines): # Replacement + return "upgraded_major_version" + return "added_technology" + + # Check for architecture changes + arch_keywords = ['service', 'component', 'layer', 'gateway', 'microservice'] + if any(kw in line.lower() for line in added_lines for kw in arch_keywords): + return "added_service" + + # Check for requirement changes + req_keywords = ['security', 'performance', 'compliance', 'GDPR', 'HIPAA'] + if any(kw in line.lower() for line in added_lines for kw in req_keywords): + return "new_security_requirement" + + # Check for API contract changes + if 'endpoint' in diff.lower() or 'route' in diff.lower(): + return "changed_api_contract" + + # Default: minor change (skip sync) + if len(added_lines) < 3 and not removed_lines: + return "typo_fix" + + return "unknown_change" +``` + +--- + +## Sync State Tracking + +**Storage**: `.meta/last-sync.json` + +```json +{ + "last_sync_timestamp": "2025-10-30T14:30:00Z", + "synced_files": { + "project-management/TECH-STACK.md": { + "hash": "abc123", + "last_modified": "2025-10-30T14:00:00Z", + "change_type": "added_technology" + }, + "project-management/ARCHITECTURE.md": { + "hash": "def456", + "last_modified": "2025-10-30T14:30:00Z", + "synced_from": "TECH-STACK.md" + } + }, + "pending_syncs": [] +} +``` + +**Update Logic**: +1. After Write/Edit to `project-management/*.md` +2. Calculate file hash (md5 of content) +3. Compare with last sync state +4. If different + significant change → Trigger sync +5. Update `.meta/last-sync.json` + +--- + +## Sync Execution Flow + +``` +PostToolUse Hook Fires + ↓ +File edited: project-management/TECH-STACK.md + ↓ +Load .meta/last-sync.json + ↓ +Calculate diff from last sync + ↓ +Classify change: "added_technology" (Redis) + ↓ +Decision: should_sync() → TRUE + ↓ +┌────────────────────────────────────┐ +│ Sync: TECH-STACK → ARCHITECTURE │ +│ - Read TECH-STACK.md additions │ +│ - Identify: Redis 7.x (cache) │ +│ - Update ARCHITECTURE.md: │ +│ + Add Redis component │ +│ + Update data flow │ +└────────────────────────────────────┘ + ↓ +Write updated ARCHITECTURE.md + ↓ +Update .meta/last-sync.json + ↓ +Log sync action + ↓ +Output brief sync report +``` + +--- + +## Sync Report Format + +```markdown +## Documentation Sync Report + +**Trigger**: TECH-STACK.md modified (added Redis) +**Timestamp**: 2025-10-30T14:30:00Z + +--- + +### Changes Detected: 1 + +1. **[SIGNIFICANT] Added technology: Redis 7.x** + - **Source**: project-management/TECH-STACK.md:45 + - **Purpose**: Caching layer for API responses + +--- + +### Syncs Applied: 2 + +1. **TECH-STACK.md → ARCHITECTURE.md** + - ✓ Added: Redis component to architecture diagram + - ✓ Updated: Data flow (added cache lookup step) + - ✓ File: project-management/ARCHITECTURE.md:120-135 + +2. **TECH-STACK.md → SPECIFICATIONS.md** + - ✓ Added: Cache invalidation API endpoint + - ✓ Updated: Response time expectations (now <100ms with cache) + - ✓ File: project-management/SPECIFICATIONS.md:78-82 + +--- + +### Validation Checks: 2 + +✓ TECH-STACK.md consistency: OK +✓ ARCHITECTURE.md alignment: OK + +--- + +**Result**: Documentation synchronized successfully. +**Next Action**: Review changes in next commit. +``` + +--- + +## Integration with PostToolUse Hook + +**Hook Location**: `.claude/hooks/post_tool_use_format.py` + +**Trigger Condition**: +```python +def should_trigger_docs_sync(file_path: str, tool_name: str) -> bool: + """Only trigger on project-management doc edits.""" + + if tool_name not in ["Write", "Edit"]: + return False + + project_docs = [ + "project-management/TECH-STACK.md", + "project-management/ARCHITECTURE.md", + "project-management/PROJECT-OVERVIEW.md", + "project-management/SPECIFICATIONS.md", + ] + + return any(doc in file_path for doc in project_docs) +``` + +**Invocation**: +```python +# In PostToolUse hook +if should_trigger_docs_sync(file_path, tool_name): + # Load skill + skill_result = invoke_skill("project-docs-sync", { + "file_path": file_path, + "change_type": classify_change(file_path, diff), + "last_sync_state": load_last_sync() + }) + + # Log sync action + log_sync_action(skill_result) +``` + +--- + +## Sync Strategies by File Type + +### TECH-STACK.md → ARCHITECTURE.md +**What to sync:** +- New databases → Add data layer component +- New frameworks → Add to tech stack diagram +- New APIs → Add integration points +- Version upgrades → Update compatibility notes + +### ARCHITECTURE.md → SPECIFICATIONS.md +**What to sync:** +- New services → Add service endpoints +- New integrations → Add API contracts +- Data model changes → Update request/response schemas +- Security layers → Add authentication specs + +### PROJECT-OVERVIEW.md → ALL DOCS +**What to validate:** +- Compliance requirements → Check encryption in TECH-STACK +- Performance goals → Check caching in ARCHITECTURE +- Target users → Check API design in SPECIFICATIONS +- Scope changes → Validate alignment across all docs + +--- + +## Conservative Sync Rules + +**DO Sync When:** +- ✅ New technology added (database, framework, library) +- ✅ Service/component added or removed +- ✅ API contract changed (new endpoint, schema change) +- ✅ Compliance requirement added (GDPR, HIPAA) +- ✅ Major version upgrade (breaking changes possible) + +**DO NOT Sync When:** +- ❌ Typo fixes (1-2 character changes) +- ❌ Formatting changes (whitespace, markdown) +- ❌ Comment/example clarifications +- ❌ Documentation of existing features (no new info) +- ❌ Minor version bumps (patch releases) + +--- + +## Error Handling + +**If sync fails:** +1. Log error to `.meta/sync-errors.log` +2. Add to pending syncs in `.meta/last-sync.json` +3. Report to user with clear action items +4. Do NOT block the write operation (non-blocking) + +**Example Error Report:** +``` +⚠ Documentation Sync Failed + +**File**: project-management/TECH-STACK.md +**Error**: Could not parse ARCHITECTURE.md (syntax error) +**Action Required**: + 1. Fix ARCHITECTURE.md syntax error (line 45) + 2. Re-run: /lazy docs-sync + +**Pending Syncs**: 1 (tracked in .meta/last-sync.json) +``` + +--- + +## Configuration + +```bash +# Disable auto-sync (manual /lazy docs-sync only) +export LAZYDEV_DISABLE_DOCS_SYNC=1 + +# Sync everything (even minor changes) +export LAZYDEV_DOCS_SYNC_AGGRESSIVE=1 + +# Sync specific files only +export LAZYDEV_DOCS_SYNC_FILES="TECH-STACK.md,ARCHITECTURE.md" +``` + +--- + +## What This Skill Does NOT Do + +❌ Sync code files (only project-management docs) +❌ Generate docs from scratch (use `/lazy docs`) +❌ Fix documentation errors (use `/lazy fix`) +❌ Create missing docs (use `/lazy plan`) + +✅ **DOES**: Automatically propagate significant changes across project documentation with conservative triggers. + +--- + +**Version**: 1.0.0 +**Non-blocking**: Syncs in background, logs errors +**Speed**: <2 seconds for typical sync diff --git a/.claude/skills/project-planner/SKILL.md b/.claude/skills/project-planner/SKILL.md new file mode 100644 index 0000000..25354be --- /dev/null +++ b/.claude/skills/project-planner/SKILL.md @@ -0,0 +1,638 @@ +--- +name: project-planner +description: Transforms project ideas into structured documentation (overview + specifications). Use when starting new projects or when brief needs project-level planning with vision, features, and technical requirements. +--- + +# Project Planner Skill + +**Purpose**: Generate comprehensive project documentation from high-level descriptions. + +**Trigger Words**: new project, project overview, project spec, technical requirements, project planning, architecture, system design + +--- + +## Quick Decision: Use Project Planning? + +```python +def needs_project_planning(context: dict) -> bool: + """Fast evaluation for project-level planning.""" + + # Indicators of project-level work + project_indicators = [ + "new project", "project overview", "system design", + "architecture", "technical requirements", "project spec", + "build a", "create a", "develop a platform", + "microservices", "full stack", "api + frontend" + ] + + description = context.get("description", "").lower() + return any(indicator in description for indicator in project_indicators) +``` + +--- + +## Output Structure + +Generates TWO documents in `project-management/`: + +### 1. PROJECT-OVERVIEW.md +High-level vision and goals + +### 2. SPECIFICATIONS.md +Detailed technical requirements + +--- + +## Document 1: PROJECT-OVERVIEW.md + +### Template Structure + +```markdown +# {Project Name} + +> {Tagline - one compelling sentence} + +## Vision + +{2-3 sentences describing the ultimate goal and impact} + +## Goals + +1. {Primary goal} +2. {Secondary goal} +3. {Tertiary goal} + +## Key Features + +- **{Feature 1}**: {Brief description} +- **{Feature 2}**: {Brief description} +- **{Feature 3}**: {Brief description} +- **{Feature 4}**: {Brief description} +- **{Feature 5}**: {Brief description} + +## Success Criteria + +1. **{Metric 1}**: {Target} +2. **{Metric 2}**: {Target} +3. **{Metric 3}**: {Target} + +## Constraints + +- **Budget**: {If specified} +- **Timeline**: {If specified} +- **Technology**: {Required tech stack or limitations} +- **Team**: {Team size/composition if known} + +## Out of Scope + +- {What this project will NOT do} +- {Features explicitly excluded} +- {Future phases} +``` + +### Example Output + +```markdown +# TaskFlow Pro + +> Modern task management with AI-powered prioritization + +## Vision + +Build a task management platform that helps remote teams stay organized through intelligent prioritization, real-time collaboration, and seamless integrations with existing tools. + +## Goals + +1. Reduce task management overhead by 50% +2. Enable real-time team collaboration +3. Integrate with popular dev tools (GitHub, Jira, Slack) + +## Key Features + +- **AI Prioritization**: ML-based task ranking by urgency and impact +- **Real-time Collaboration**: Live updates, comments, mentions +- **Smart Integrations**: Auto-sync with GitHub issues, Jira tickets +- **Custom Workflows**: Configurable pipelines per team +- **Analytics Dashboard**: Team productivity insights + +## Success Criteria + +1. **User Adoption**: 1000 active users in 6 months +2. **Performance**: <200ms API response time +3. **Reliability**: 99.9% uptime + +## Constraints + +- Timeline: 6 months MVP +- Technology: Python backend, React frontend, PostgreSQL +- Team: 2 backend, 2 frontend, 1 ML engineer + +## Out of Scope + +- Mobile apps (Phase 2) +- Video conferencing +- Time tracking (separate product) +``` + +--- + +## Document 2: SPECIFICATIONS.md + +### Template Structure + +```markdown +# {Project Name} - Technical Specifications + +## Functional Requirements + +### Core Features + +#### {Feature 1} +- **Description**: {What it does} +- **User Story**: As a {role}, I want {action} so that {benefit} +- **Acceptance Criteria**: + - [ ] {Criterion 1} + - [ ] {Criterion 2} + - [ ] {Criterion 3} + +#### {Feature 2} +{Repeat structure} + +### User Flows + +#### {Flow 1}: {Name} +1. User {action} +2. System {response} +3. User {next action} +4. Result: {outcome} + +--- + +## Non-Functional Requirements + +### Performance +- API response time: <200ms (p95) +- Page load time: <1s +- Concurrent users: 10,000+ +- Database queries: <50ms + +### Security +- Authentication: OAuth2 + JWT +- Authorization: Role-based access control (RBAC) +- Data encryption: AES-256 at rest, TLS 1.3 in transit +- Rate limiting: 100 req/min per user + +### Reliability +- Uptime: 99.9% SLA +- Backup frequency: Daily +- Recovery time: <1 hour (RTO) +- Data loss: <5 minutes (RPO) + +### Scalability +- Horizontal scaling: Auto-scale based on load +- Database: Read replicas for queries +- Cache: Redis for hot data +- CDN: Static assets + +--- + +## API Contracts + +### Authentication API + +#### POST /api/auth/login +```json +// Request +{ + "email": "user@example.com", + "password": "hashed_password" +} + +// Response (200 OK) +{ + "token": "jwt_token_here", + "user": { + "id": "user_123", + "email": "user@example.com", + "name": "John Doe" + } +} + +// Error (401 Unauthorized) +{ + "error": "Invalid credentials" +} +``` + +#### POST /api/auth/logout +{Repeat structure for each endpoint} + +### Tasks API + +#### GET /api/tasks +```json +// Query params: ?page=1&per_page=50&status=active +// Response (200 OK) +{ + "tasks": [ + { + "id": "task_123", + "title": "Fix bug in auth", + "status": "active", + "priority": "high", + "assignee": "user_456", + "created_at": "2025-10-30T10:00:00Z" + } + ], + "pagination": { + "page": 1, + "per_page": 50, + "total": 150 + } +} +``` + +{Continue for all major endpoints} + +--- + +## Data Models + +### User +```python +class User: + id: str (UUID) + email: str (unique, indexed) + password_hash: str + name: str + role: Enum['admin', 'member', 'viewer'] + created_at: datetime + updated_at: datetime + last_login: datetime | None +``` + +### Task +```python +class Task: + id: str (UUID) + title: str (max 200 chars) + description: str | None + status: Enum['backlog', 'active', 'completed'] + priority: Enum['low', 'medium', 'high', 'urgent'] + assignee_id: str | None (FK -> User.id) + project_id: str (FK -> Project.id) + due_date: datetime | None + created_at: datetime + updated_at: datetime +``` + +{Continue for all major models} + +--- + +## System Architecture + +### Components +- **API Gateway**: Kong/NGINX for routing and rate limiting +- **Backend Services**: FastAPI/Django microservices +- **Database**: PostgreSQL (primary), Redis (cache) +- **Message Queue**: RabbitMQ for async tasks +- **Storage**: S3 for file uploads +- **Monitoring**: Prometheus + Grafana + +### Deployment +- **Infrastructure**: AWS/GCP Kubernetes +- **CI/CD**: GitHub Actions +- **Environments**: dev, staging, production +- **Rollback**: Blue-green deployment + +--- + +## Dependencies + +### Backend +- Python 3.11+ +- FastAPI or Django REST Framework +- SQLAlchemy or Django ORM +- Celery for background tasks +- pytest for testing + +### Frontend +- React 18+ or Vue 3+ +- TypeScript +- Tailwind CSS or Material-UI +- Axios for API calls +- Vitest or Jest for testing + +### Infrastructure +- Docker + Docker Compose +- Kubernetes (production) +- PostgreSQL 15+ +- Redis 7+ +- NGINX or Caddy + +--- + +## Development Phases + +### Phase 1: MVP (Months 1-3) +- [ ] User authentication +- [ ] Basic task CRUD +- [ ] Simple prioritization +- [ ] API foundation + +### Phase 2: Collaboration (Months 4-5) +- [ ] Real-time updates (WebSocket) +- [ ] Comments and mentions +- [ ] Team management + +### Phase 3: Integrations (Month 6) +- [ ] GitHub integration +- [ ] Jira sync +- [ ] Slack notifications + +--- + +## Testing Strategy + +### Unit Tests +- Coverage: >80% +- All business logic functions +- Mock external dependencies + +### Integration Tests +- API endpoint testing +- Database transactions +- Authentication flows + +### E2E Tests +- Critical user flows +- Payment processing (if applicable) +- Admin workflows + +--- + +## Security Considerations + +### OWASP Top 10 Coverage +1. **Injection**: Parameterized queries, input validation +2. **Broken Auth**: JWT with refresh tokens, secure session management +3. **Sensitive Data**: Encryption at rest and in transit +4. **XXE**: Disable XML external entities +5. **Broken Access Control**: RBAC enforcement +6. **Security Misconfiguration**: Secure defaults, regular audits +7. **XSS**: Output escaping, CSP headers +8. **Insecure Deserialization**: Validate all input +9. **Known Vulnerabilities**: Dependency scanning (Snyk, Dependabot) +10. **Insufficient Logging**: Audit logs for sensitive actions + +--- + +## Monitoring & Observability + +### Metrics +- Request rate, error rate, latency (RED method) +- Database connection pool usage +- Cache hit/miss ratio +- Background job queue length + +### Logging +- Structured JSON logs +- Centralized logging (ELK stack or CloudWatch) +- Log levels: DEBUG (dev), INFO (staging), WARN/ERROR (prod) + +### Alerting +- Error rate >5% (P1) +- API latency >500ms (P2) +- Database connections >80% (P2) +- Disk usage >90% (P1) + +--- + +## Documentation Requirements + +- [ ] API documentation (OpenAPI/Swagger) +- [ ] Setup guide (README.md) +- [ ] Architecture diagrams +- [ ] Deployment runbook +- [ ] Troubleshooting guide + +``` + +--- + +## Generation Process + +### Step 1: Extract Project Context +```python +def extract_project_info(prompt: str) -> dict: + """Parse project description for key details.""" + + info = { + "name": None, + "description": prompt, + "features": [], + "tech_stack": [], + "constraints": {}, + "goals": [] + } + + # Extract from prompt: + # - Project name (if mentioned) + # - Desired features + # - Technology preferences + # - Timeline/budget constraints + # - Success metrics + + return info +``` + +### Step 2: Apply Output Style +Use `output-style-selector` to determine: +- **PROJECT-OVERVIEW.md**: Bullet-points, concise +- **SPECIFICATIONS.md**: Table-based for API contracts, YAML-structured for models + +### Step 3: Generate Documents +1. Create `project-management/` directory if needed +2. Write PROJECT-OVERVIEW.md (vision-focused) +3. Write SPECIFICATIONS.md (technical details) +4. Validate completeness + +### Step 4: Validation Checklist +```markdown +## Generated Documents Validation + +PROJECT-OVERVIEW.md: +- [ ] Project name and tagline present +- [ ] Vision statement (2-3 sentences) +- [ ] 3+ goals defined +- [ ] 5-10 key features listed +- [ ] Success criteria measurable +- [ ] Constraints documented +- [ ] Out-of-scope items listed + +SPECIFICATIONS.md: +- [ ] Functional requirements detailed +- [ ] Non-functional requirements (perf, security, reliability) +- [ ] API contracts with examples (if applicable) +- [ ] Data models defined +- [ ] Architecture overview +- [ ] Dependencies listed +- [ ] Development phases outlined +- [ ] Testing strategy included +``` + +--- + +## Integration with Commands + +### With `/lazy plan` +```bash +# Generate project docs first +/lazy plan --project "Build AI-powered task manager" + +→ project-planner skill triggers +→ Generates PROJECT-OVERVIEW.md + SPECIFICATIONS.md +→ Then creates first user story from specifications + +# Or start from enhanced prompt +/lazy plan --file enhanced_prompt.md + +→ Detects project-level scope +→ Runs project-planner +→ Creates foundational docs +→ Proceeds with story creation +``` + +### With `/lazy code` +```bash +# Reference specifications during implementation +/lazy code @US-3.4.md + +→ context-packer loads SPECIFICATIONS.md +→ API contracts and data models available +→ Implementation follows spec +``` + +--- + +## What This Skill Does NOT Do + +❌ Generate actual code (that's for `coder` agent) +❌ Create user stories (that's for `project-manager` agent) +❌ Make architectural decisions (provides template, you decide) +❌ Replace technical design documents (TDDs) + +✅ **DOES**: Create structured foundation documents for new projects. + +--- + +## Configuration + +```bash +# Minimal specs (faster, less detail) +export LAZYDEV_PROJECT_SPEC_MINIMAL=1 + +# Skip API contracts (non-API projects) +export LAZYDEV_PROJECT_NO_API=1 + +# Focus on specific aspects +export LAZYDEV_PROJECT_FOCUS="security,performance" +``` + +--- + +## Tips for Effective Project Planning + +### For PROJECT-OVERVIEW.md +1. **Vision**: Think big picture - why does this exist? +2. **Goals**: Limit to 3-5 measurable outcomes +3. **Features**: High-level only (not task-level details) +4. **Success Criteria**: Must be measurable (numbers, percentages) + +### For SPECIFICATIONS.md +1. **API Contracts**: Start with authentication and core resources +2. **Data Models**: Include relationships and constraints +3. **Non-Functional**: Don't skip - these prevent tech debt +4. **Security**: Reference OWASP Top 10 coverage +5. **Phases**: Break into 2-3 month chunks maximum + +### Best Practices +- **Keep PROJECT-OVERVIEW under 2 pages**: Executive summary only +- **SPECIFICATIONS can be longer**: This is the source of truth +- **Update specs as you learn**: Living documents +- **Version control both**: Track changes over time + +--- + +## Example Trigger Scenarios + +### Scenario 1: New Greenfield Project +``` +User: "I want to build a real-time chat platform with video calls" + +→ project-planner triggers +→ Generates: + - PROJECT-OVERVIEW.md (vision: modern communication platform) + - SPECIFICATIONS.md (WebSocket APIs, video streaming, etc.) +→ Ready for user story creation +``` + +### Scenario 2: From Enhanced Prompt +``` +User: /lazy plan --file enhanced_prompt.md +# enhanced_prompt contains: detailed project requirements, tech stack, timeline + +→ project-planner parses prompt +→ Extracts structured information +→ Generates both documents +→ Proceeds to first user story +``` + +### Scenario 3: Partial Information +``` +User: "Build a task manager, not sure about details yet" + +→ project-planner generates template +→ Marks sections as [TODO: Specify...] +→ User fills in gaps incrementally +→ Re-generate or update manually +``` + +--- + +## Output Format (Completion) + +```markdown +## Project Planning Complete + +**Documents Generated**: + +1. **PROJECT-OVERVIEW.md** (2.4KB) + - Project: TaskFlow Pro + - Vision: Modern task management with AI + - Features: 5 key features defined + - Success criteria: 3 measurable metrics + +2. **SPECIFICATIONS.md** (8.1KB) + - Functional requirements: 5 core features detailed + - API contracts: 12 endpoints documented + - Data models: 6 models defined + - Architecture: Microservices with Kubernetes + - Development phases: 3 phases over 6 months + +**Location**: `./project-management/` + +**Next Steps**: +1. Review and refine generated documents +2. Run: `/lazy plan "First user story description"` +3. Begin implementation with `/lazy code` + +**Estimated Setup Time**: 15-20 minutes to review/customize +``` + +--- + +**Version**: 1.0.0 +**Output Size**: 10-15KB total (both documents) +**Generation Time**: ~30 seconds diff --git a/.claude/skills/regression-testing/SKILL.md b/.claude/skills/regression-testing/SKILL.md new file mode 100644 index 0000000..788ca4a --- /dev/null +++ b/.claude/skills/regression-testing/SKILL.md @@ -0,0 +1,430 @@ +--- +name: regression-testing +description: Evaluates and implements regression tests after bug fixes based on severity, code complexity, and coverage. Use when bugs are fixed to prevent future regressions. +--- + +# Regression Testing Skill + +**Purpose**: Automatically evaluate and implement regression tests after bug fixes to prevent future regressions. + +**When to Trigger**: This skill activates after bug fixes are implemented, allowing Claude (the orchestrator) to decide if regression tests would be valuable based on context. + +--- + +## Decision Criteria (Orchestrator Evaluation) + +Before implementing regression tests, evaluate these factors: + +### High Value Scenarios (Implement Regression Tests) +- **Critical Bugs**: Security, data loss, or production-impacting issues +- **Subtle Bugs**: Edge cases, race conditions, timing issues that are easy to miss +- **Complex Logic**: Multi-step workflows, state machines, intricate business rules +- **Low Coverage Areas**: Bug occurred in under-tested code (<70% coverage) +- **Recurring Patterns**: Similar bugs fixed before in related code +- **Integration Points**: Bugs at module/service boundaries + +### Lower Value Scenarios (Skip or Defer) +- **Trivial Fixes**: Typos, obvious logic errors with existing tests +- **Already Well-Tested**: Bug area has >90% coverage with comprehensive tests +- **One-Time Anomalies**: Environmental issues, config errors (not code bugs) +- **Rapid Prototyping**: Early-stage features expected to change significantly +- **UI-Only Changes**: Purely cosmetic fixes with no logic impact + +--- + +## Regression Test Strategy + +### 1. Bug Analysis Phase + +**Understand the Bug:** +```markdown +## Bug Context +- **What broke**: [Symptom/error] +- **Root cause**: [Why it happened] +- **Fix applied**: [What changed] +- **Failure scenario**: [Steps to reproduce original bug] +``` + +**Evaluate Test Value:** +```python +def should_add_regression_test(bug_context: dict) -> tuple[bool, str]: + """ + Decide if regression test is valuable. + + Returns: + (add_test: bool, reason: str) + """ + severity = bug_context.get("severity") # critical, high, medium, low + complexity = bug_context.get("complexity") # high, medium, low + coverage = bug_context.get("coverage_pct", 0) + + # Critical bugs always get regression tests + if severity == "critical": + return True, "Critical bug requires regression test" + + # Complex bugs with low coverage + if complexity == "high" and coverage < 70: + return True, "Complex logic with insufficient coverage" + + # Already well-tested + if coverage > 90: + return False, "Area already has comprehensive tests" + + # Default: add test for medium+ severity + if severity in {"high", "medium"}: + return True, f"Bug severity {severity} warrants regression test" + + return False, "Low-value regression test, skipping" +``` + +### 2. Regression Test Implementation + +**Test Structure:** +```python +# test__regression.py + +import pytest +from datetime import datetime + +class TestRegressions: + """Regression tests for fixed bugs.""" + + def test_regression_issue_123_null_pointer_in_payment(self): + """ + Regression test for GitHub issue #123. + + Bug: NullPointerException when processing payment with missing user email. + Fixed: 2025-10-30 + Root cause: Missing null check in payment processor + + This test ensures the fix remains in place and prevents regression. + """ + # Arrange: Setup scenario that caused original bug + payment = Payment(amount=100.0, user=User(email=None)) + processor = PaymentProcessor() + + # Act: Execute the previously failing code path + result = processor.process(payment) + + # Assert: Verify fix works (no exception, proper error handling) + assert result.status == "failed" + assert "invalid user email" in result.error_message.lower() + + def test_regression_pr_456_race_condition_in_cache(self): + """ + Regression test for PR #456. + + Bug: Race condition in cache invalidation caused stale reads + Fixed: 2025-10-30 + Root cause: Non-atomic read-modify-write operation + + This test simulates concurrent cache access to verify thread safety. + """ + # Arrange: Setup concurrent scenario + cache = ThreadSafeCache() + cache.set("key", "value1") + + # Act: Simulate race condition with threads + with ThreadPoolExecutor(max_workers=10) as executor: + futures = [ + executor.submit(cache.update, "key", f"value{i}") + for i in range(100) + ] + wait(futures) + + # Assert: Verify no stale reads or corruption + final_value = cache.get("key") + assert final_value.startswith("value") + assert cache.consistency_check() # Internal consistency +``` + +**Test Naming Convention:** +- `test_regression__` +- Include issue/PR number for traceability +- Short description of what broke + +**Test Documentation:** +- **Bug description**: What failed +- **Date fixed**: When fix was applied +- **Root cause**: Why it happened +- **Test purpose**: What regression is prevented + +### 3. Regression Test Coverage + +**What to Test:** +1. **Exact Failure Scenario**: Reproduce original bug conditions +2. **Edge Cases Around Fix**: Test boundaries near the bug +3. **Integration Impact**: Test how fix affects dependent code +4. **Performance**: If bug was performance-related, add benchmark + +**What NOT to Test:** +- Don't duplicate existing unit tests +- Don't test obvious behavior already covered +- Don't over-specify implementation details (brittle tests) + +--- + +## Workflow Integration + +### Standard Bug Fix Flow + +```bash +# 1. Fix the bug +/lazy code "fix: null pointer in payment processor" + +# ✓ Bug fixed and committed + +# 2. Regression testing skill evaluates +# (Automatic trigger after bug fix commit) + +## Decision: Add regression test? +- Severity: HIGH (production crash) +- Coverage: 65% (medium) +- Complexity: MEDIUM +→ **YES, add regression test** + +# 3. Implement regression test +# ✓ test_regression_issue_123_null_pointer_in_payment() added +# ✓ Coverage increased to 78% +# ✓ Test passes (bug is fixed) + +# 4. Commit regression test +git add tests/test_payment_regression.py +git commit -m "test: add regression test for issue #123 null pointer" +``` + +### Quick Bug Fix (Skip Regression) + +```bash +# 1. Fix trivial bug +/lazy code "fix: typo in error message" + +# ✓ Bug fixed + +# 2. Regression testing skill evaluates +## Decision: Add regression test? +- Severity: LOW (cosmetic) +- Coverage: 95% (excellent) +- Complexity: LOW (trivial) +→ **NO, skip regression test** (low value, already well-tested) + +# 3. Commit fix only +# No additional test needed +``` + +--- + +## Regression Test Suite Management + +### Organization + +``` +tests/ +├── test_module.py # Regular unit tests +├── test_module_integration.py # Integration tests +└── test_module_regression.py # Regression tests (this skill) +``` + +**Separate regression tests** to: +- Track historical bug fixes +- Easy to identify which tests prevent regressions +- Can be run as separate CI job for faster feedback + +### CI/CD Integration + +```yaml +# .github/workflows/ci.yml + +jobs: + regression-tests: + runs-on: ubuntu-latest + steps: + - name: Run regression test suite + run: pytest tests/*_regression.py -v --tb=short + + # Fast feedback: regression tests run first + # If they fail, likely a regression occurred +``` + +### Regression Test Metrics + +**Track Over Time:** +- Total regression tests count +- Bug recurrence rate (0% is goal) +- Coverage increase from regression tests +- Time to detect regression (should be in CI, not production) + +--- + +## Examples + +### Example 1: Critical Bug (Add Regression Test) + +**Bug**: Authentication bypass when session token is malformed +**Fix**: Added token validation +**Decision**: ✅ **Add regression test** (security critical) + +```python +def test_regression_issue_789_auth_bypass_malformed_token(): + """ + Regression test for security issue #789. + + Bug: Malformed session tokens bypassed authentication + Fixed: 2025-10-30 + Severity: CRITICAL (security) + Root cause: Missing token format validation + """ + # Arrange: Malformed token that bypassed auth + malformed_token = "invalid||format||token" + + # Act: Attempt authentication + result = AuthService.validate_token(malformed_token) + + # Assert: Should reject malformed token + assert result.is_valid is False + assert result.error == "invalid_token_format" +``` + +### Example 2: Complex Bug (Add Regression Test) + +**Bug**: Race condition in distributed lock causes duplicate job execution +**Fix**: Atomic compare-and-swap operation +**Decision**: ✅ **Add regression test** (complex concurrency issue) + +```python +def test_regression_pr_234_race_condition_duplicate_jobs(): + """ + Regression test for PR #234. + + Bug: Race condition allowed duplicate job execution + Fixed: 2025-10-30 + Complexity: HIGH (concurrency) + Root cause: Non-atomic lock acquisition + """ + # Arrange: Simulate concurrent job submissions + job_queue = DistributedJobQueue() + job_id = "test-job-123" + + # Act: 100 threads try to acquire same job + with ThreadPoolExecutor(max_workers=100) as executor: + futures = [ + executor.submit(job_queue.try_acquire_job, job_id) + for _ in range(100) + ] + results = [f.result() for f in futures] + + # Assert: Only ONE thread should acquire the job + acquired = [r for r in results if r.acquired] + assert len(acquired) == 1, "Race condition: multiple threads acquired same job" +``` + +### Example 3: Trivial Bug (Skip Regression Test) + +**Bug**: Typo in log message "Usre authenticated" → "User authenticated" +**Fix**: Corrected spelling +**Decision**: ❌ **Skip regression test** (cosmetic, no logic impact) + +``` +No test needed. Fix is obvious and has no functional impact. +Existing tests already cover authentication logic. +``` + +### Example 4: Well-Tested Area (Skip Regression Test) + +**Bug**: Off-by-one error in pagination (page 1 showed 0 results) +**Fix**: Changed `offset = page * size` to `offset = (page - 1) * size` +**Coverage**: 95% (pagination thoroughly tested) +**Decision**: ❌ **Skip regression test** (area already has comprehensive tests) + +```python +# Existing test already covers this: +def test_pagination_first_page_shows_results(): + results = api.get_users(page=1, size=10) + assert len(results) == 10 # This test would have caught the bug +``` + +--- + +## Best Practices + +### DO: +✅ Add regression tests for **critical and complex bugs** +✅ Include **issue/PR number** in test name for traceability +✅ Document **what broke, why, and when** in test docstring +✅ Test the **exact failure scenario** that caused the bug +✅ Keep regression tests **separate** from unit tests (easier tracking) +✅ Run regression tests in **CI/CD** for early detection + +### DON'T: +❌ Add regression tests for **trivial or cosmetic bugs** +❌ Duplicate **existing comprehensive tests** +❌ Write **brittle tests** that test implementation details +❌ Skip **root cause analysis** (understand why it broke) +❌ Forget to **verify test fails** before fix (should reproduce bug) + +--- + +## Output Format + +When this skill triggers, provide: + +```markdown +## Regression Test Evaluation + +**Bug Fixed**: [Brief description] +**Issue/PR**: #[number] +**Severity**: [critical/high/medium/low] +**Complexity**: [high/medium/low] +**Current Coverage**: [X%] + +**Decision**: [✅ Add Regression Test | ❌ Skip Regression Test] + +**Reason**: [Why regression test is/isn't valuable] + +--- + +[If adding test] +## Regression Test Implementation + +**File**: `tests/test__regression.py` + +```python +def test_regression__(): + """ + [Docstring with bug context] + """ + # Test implementation +``` + +**Coverage Impact**: +X% (before: Y%, after: Z%) +``` + +--- + +## Integration with Other Skills + +- **Works with**: `test-driven-development` (adds tests post-fix) +- **Complements**: `code-review-request` (reviewer checks for regression tests) +- **Used by**: `/lazy fix` command (auto-evaluates regression test need) + +--- + +## Configuration + +**Environment Variables:** +```bash +# Force regression tests for all bugs (strict mode) +export LAZYDEV_FORCE_REGRESSION_TESTS=1 + +# Disable regression test skill +export LAZYDEV_DISABLE_REGRESSION_SKILL=1 + +# Minimum coverage threshold to skip regression test (default: 90) +export LAZYDEV_REGRESSION_SKIP_COVERAGE_THRESHOLD=90 +``` + +--- + +**Version**: 1.0.0 +**Created**: 2025-10-30 +**Anthropic Best Practice**: Model-invoked, autonomous trigger after bug fixes diff --git a/.claude/skills/security-audit/SKILL.md b/.claude/skills/security-audit/SKILL.md new file mode 100644 index 0000000..c43855d --- /dev/null +++ b/.claude/skills/security-audit/SKILL.md @@ -0,0 +1,274 @@ +--- +name: security-audit +description: Triggers for authentication, payments, user input, and API endpoints to check OWASP risks. Auto-evaluates security need and provides actionable fixes, not checklists. +--- + +# Security Audit Skill + +**Purpose**: Catch security vulnerabilities early with targeted checks, not generic checklists. + +**Trigger Words**: auth, login, password, payment, credit card, token, API endpoint, user input, SQL, database query, session, cookie, upload + +--- + +## Quick Decision: When to Audit? + +```python +def needs_security_audit(code_context: dict) -> bool: + """Fast security risk evaluation.""" + + # ALWAYS audit these (high risk) + critical_patterns = [ + "authentication", "authorization", "login", "password", + "payment", "credit card", "billing", "stripe", "paypal", + "admin", "sudo", "privilege", "role", + "token", "jwt", "session", "cookie", + "sql", "database", "query", "exec", "eval", + "upload", "file", "download", "path traversal" + ] + + # Check if any critical pattern in code + if any(p in code_context.get("description", "").lower() for p in critical_patterns): + return True + + # Skip for: docs, tests, config, low-risk utils + skip_patterns = ["test_", "docs/", "README", "config", "utils"] + if any(p in code_context.get("files", []) for p in skip_patterns): + return False + + return False +``` + +--- + +## Security Checks (Targeted, Not Exhaustive) + +### 1. **Input Validation** (Most Common) +```python +# ❌ BAD - No validation +def get_user(user_id): + return db.query(f"SELECT * FROM users WHERE id = {user_id}") + +# ✅ GOOD - Validated + parameterized +def get_user(user_id: int): + if not isinstance(user_id, int) or user_id <= 0: + raise ValueError("Invalid user_id") + return db.query("SELECT * FROM users WHERE id = ?", [user_id]) +``` + +**Quick Fix**: Add type hints + validation at entry points. + +--- + +### 2. **SQL Injection** (Critical) +```python +# ❌ BAD - String interpolation +query = f"SELECT * FROM users WHERE email = '{email}'" + +# ✅ GOOD - Parameterized queries +query = "SELECT * FROM users WHERE email = ?" +db.execute(query, [email]) +``` + +**Quick Fix**: Never use f-strings for SQL. Use ORM or parameterized queries. + +--- + +### 3. **Authentication & Secrets** (Critical) +```python +# ❌ BAD - Hardcoded secrets +API_KEY = "sk_live_abc123" +password = "admin123" + +# ✅ GOOD - Environment variables +API_KEY = os.getenv("STRIPE_API_KEY") +# Passwords: bcrypt hashed, never plaintext + +# ❌ BAD - Weak session +session["user_id"] = user_id # No expiry, no signing + +# ✅ GOOD - Secure session +session.permanent = False +session["user_id"] = user_id +session["expires"] = time.time() + 3600 # 1 hour +``` + +**Quick Fix**: Extract secrets to .env, hash passwords, add session expiry. + +--- + +### 4. **Authorization** (Often Forgotten) +```python +# ❌ BAD - Missing authorization check +@app.route("/admin/users/", methods=["DELETE"]) +def delete_user(user_id): + User.delete(user_id) # Anyone can delete! + +# ✅ GOOD - Check permissions +@app.route("/admin/users/", methods=["DELETE"]) +@require_role("admin") +def delete_user(user_id): + if not current_user.can_delete(user_id): + abort(403) + User.delete(user_id) +``` + +**Quick Fix**: Add permission checks before destructive operations. + +--- + +### 5. **Rate Limiting** (API Endpoints) +```python +# ❌ BAD - No rate limit +@app.route("/api/login", methods=["POST"]) +def login(): + # Brute force possible + return authenticate(request.json) + +# ✅ GOOD - Rate limited +@app.route("/api/login", methods=["POST"]) +@rate_limit("5 per minute") +def login(): + return authenticate(request.json) +``` + +**Quick Fix**: Add rate limiting to login, payment, sensitive endpoints. + +--- + +### 6. **XSS Prevention** (Frontend/Templates) +```python +# ❌ BAD - Unescaped user input +return f"
Welcome {username}
" # XSS if username = "" + +# ✅ GOOD - Escaped output +from html import escape +return f"
Welcome {escape(username)}
" + +# Or use framework escaping (Jinja2, React auto-escapes) +``` + +**Quick Fix**: Escape user input in HTML. Use framework defaults. + +--- + +### 7. **File Upload Safety** +```python +# ❌ BAD - No validation +@app.route("/upload", methods=["POST"]) +def upload(): + file = request.files["file"] + file.save(f"uploads/{file.filename}") # Path traversal! Overwrite! + +# ✅ GOOD - Validated +import os +from werkzeug.utils import secure_filename + +ALLOWED_EXTENSIONS = {"png", "jpg", "pdf"} + +@app.route("/upload", methods=["POST"]) +def upload(): + file = request.files["file"] + if not file or "." not in file.filename: + abort(400, "Invalid file") + + ext = file.filename.rsplit(".", 1)[1].lower() + if ext not in ALLOWED_EXTENSIONS: + abort(400, "File type not allowed") + + filename = secure_filename(file.filename) + file.save(os.path.join("uploads", filename)) +``` + +**Quick Fix**: Whitelist extensions, sanitize filenames, limit size. + +--- + +## Output Format (Actionable Only) + +```markdown +## Security Audit Results + +**Risk Level**: [CRITICAL | HIGH | MEDIUM | LOW] + +### Issues Found: X + +1. **[CRITICAL] SQL Injection in get_user() (auth.py:45)** + - Issue: f-string used for SQL query + - Fix: Use parameterized query + - Code: + ```python + # Change this: + query = f"SELECT * FROM users WHERE id = {user_id}" + # To this: + query = "SELECT * FROM users WHERE id = ?" + db.execute(query, [user_id]) + ``` + +2. **[HIGH] Missing rate limiting on /api/login** + - Issue: Brute force attacks possible + - Fix: Add @rate_limit("5 per minute") decorator + +3. **[MEDIUM] Hardcoded API key in config.py:12** + - Issue: Secret in code + - Fix: Move to environment variable + +--- + +**Next Steps**: +1. Fix CRITICAL issues first (SQL injection) +2. Add rate limiting (5 min fix) +3. Extract secrets to .env +4. Re-run security audit after fixes +``` + +--- + +## Integration with Workflow + +```bash +# Automatic trigger +/lazy code "add user login endpoint" + +→ security-audit triggers +→ Checks: password handling, session, rate limiting +→ Finds: Missing bcrypt hash, no rate limit +→ Suggests fixes with code examples +→ Developer applies fixes +→ Re-audit confirms: ✅ Secure + +# Manual trigger +Skill(command="security-audit") +``` + +--- + +## What This Skill Does NOT Do + +❌ Generate 50-item security checklists (not actionable) +❌ Flag every minor issue (noise) +❌ Require penetration testing (that's a different tool) +❌ Cover infrastructure security (AWS, Docker, etc.) + +✅ **DOES**: Catch common code-level vulnerabilities with fast, practical fixes. + +--- + +## Configuration + +```bash +# Strict mode: audit everything (slower) +export LAZYDEV_SECURITY_STRICT=1 + +# Disable security skill +export LAZYDEV_DISABLE_SECURITY=1 + +# Focus on specific risks only +export LAZYDEV_SECURITY_FOCUS="sql,auth,xss" +``` + +--- + +**Version**: 1.0.0 +**OWASP Coverage**: SQL Injection, XSS, Broken Auth, Insecure Design, Security Misconfiguration +**Speed**: <5 seconds for typical file diff --git a/.claude/skills/subagent-driven-development/SKILL.md b/.claude/skills/subagent-driven-development/SKILL.md deleted file mode 100644 index 12c3708..0000000 --- a/.claude/skills/subagent-driven-development/SKILL.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -name: subagent-driven-development -description: Route subtasks to the best sub-agent (coder, reviewer, research, PM) with clear handoffs -version: 0.1.0 -tags: [agents, orchestration] -triggers: - - subagent - - delegate - - handoff ---- - -# Subagent-Driven Development - -## Purpose -Decompose a problem and delegate each subtask to the appropriate agent with minimal ceremony. - -## Behavior -1. Identify subtasks (≤5) and required outputs. -2. Map to agents: - - Project Manager → story/tasks - - Coder → implementation/tests - - Reviewer → findings/patch plan - - Research → docs/examples -3. For each handoff, specify: input, expected artifacts, and success criteria. -4. Summarize progress and next agent to call. - -## Guardrails -- Keep subtasks atomic and ≤4 hours each. -- Avoid duplicate work; reuse artifacts across agents. - -## Integration -- All agents under `LAZY_DEV/.claude/agents/`; `/lazy create-feature`, `/lazy task-exec`, `/lazy story-review`. - -## Example Prompt -> Break this feature into subagent tasks and propose the call sequence. - diff --git a/.claude/skills/tech-stack-architect/SKILL.md b/.claude/skills/tech-stack-architect/SKILL.md new file mode 100644 index 0000000..f18c073 --- /dev/null +++ b/.claude/skills/tech-stack-architect/SKILL.md @@ -0,0 +1,376 @@ +--- +name: tech-stack-architect +description: Design complete technology stack and system architecture from project requirements - generates TECH-STACK.md with frontend/backend/database/DevOps choices plus rationale, and ARCHITECTURE.md with components, data flow, and mermaid diagrams +version: 0.1.0 +tags: [architecture, planning, tech-stack, design] +triggers: + - tech stack + - architecture + - technology choices + - system design + - architecture diagram +--- + +# Tech Stack Architect + +## Purpose +Generate comprehensive technology stack selection and system architecture design from project requirements. Creates two foundational documents that guide implementation. + +## When to Use +- Starting a new project after PROJECT-OVERVIEW.md is created +- Re-architecting existing systems +- Technology evaluation and selection +- Architecture documentation needed +- User mentions "tech stack", "architecture", "system design" + +## Behavior + +### Phase 1: Technology Stack Selection + +1. **Read PROJECT-OVERVIEW.md** for: + - Project goals and constraints + - Scale requirements (users, data, traffic) + - Team skills and preferences + - Budget and timeline + - Compliance requirements + +2. **Analyze requirements** across 4 categories: + - Frontend (framework, state management, UI library) + - Backend (language, framework, API style) + - Database (RDBMS, NoSQL, caching, search) + - DevOps (hosting, CI/CD, monitoring, security) + +3. **Generate TECH-STACK.md** with: + - **Category tables**: Technology | Rationale | Alternatives Considered + - **Integration notes**: How technologies work together + - **Trade-offs**: What you gain/lose with this stack + - **Migration path**: How to evolve the stack + - **Team considerations**: Learning curve, hiring, support + +### Phase 2: System Architecture Design + +1. **Design components**: + - Client-side architecture + - API layer and services + - Data storage and caching + - Background jobs and queues + - External integrations + +2. **Define data flow**: + - Request/response paths + - Authentication flow + - Data persistence patterns + - Event-driven flows (if applicable) + +3. **Generate ARCHITECTURE.md** with: + - **System Overview**: High-level component diagram (C4 Context) + - **Component Details**: Responsibilities, interfaces, dependencies + - **Data Flow Diagrams**: Key user journeys with sequence diagrams + - **Scalability Strategy**: Horizontal scaling, caching, load balancing + - **Security Architecture**: Auth, encryption, OWASP considerations + - **Mermaid Diagrams**: C4, sequence, data flow, deployment + +## Output Style +- Use `table-based` for technology comparisons +- Use `markdown-focused` with mermaid diagrams for architecture +- Keep rationales concise (1-2 sentences per choice) +- Include visual diagrams for clarity + +## Output Files + +### 1. project-management/TECH-STACK.md +```markdown +# Technology Stack + +## Summary +[2-3 sentence overview of the stack philosophy] + +## Frontend Stack + +| Technology | Choice | Rationale | Alternatives Considered | +|------------|--------|-----------|------------------------| +| Framework | React 18 | ... | Vue, Svelte, Angular | +| State | Zustand | ... | Redux, Jotai, Context | +| UI Library | Tailwind + shadcn/ui | ... | MUI, Chakra, custom | +| Build | Vite | ... | Webpack, Turbopack | + +## Backend Stack + +| Technology | Choice | Rationale | Alternatives Considered | +|------------|--------|-----------|------------------------| +| Language | Python 3.11 | ... | Node.js, Go, Rust | +| Framework | FastAPI | ... | Django, Flask, Express | +| API Style | REST + OpenAPI | ... | GraphQL, gRPC, tRPC | + +## Database & Storage + +| Technology | Choice | Rationale | Alternatives Considered | +|------------|--------|-----------|------------------------| +| Primary DB | PostgreSQL 15 | ... | MySQL, MongoDB, SQLite | +| Caching | Redis | ... | Memcached, Valkey | +| Search | ElasticSearch | ... | Algolia, Meilisearch | +| Object Storage | S3 | ... | MinIO, CloudFlare R2 | + +## DevOps & Infrastructure + +| Technology | Choice | Rationale | Alternatives Considered | +|------------|--------|-----------|------------------------| +| Hosting | AWS ECS Fargate | ... | k8s, VM, serverless | +| CI/CD | GitHub Actions | ... | GitLab CI, CircleCI | +| Monitoring | DataDog | ... | Grafana, New Relic | +| Secrets | AWS Secrets Manager | ... | Vault, Doppler | + +## Integration Notes +- [How frontend talks to backend] +- [Database connection pooling strategy] +- [Caching layer integration] +- [CI/CD pipeline flow] + +## Trade-offs +**Gains**: [What this stack provides] +**Costs**: [Complexity, vendor lock-in, learning curve] + +## Migration Path +- Phase 1: [Initial minimal stack] +- Phase 2: [Add caching, search] +- Phase 3: [Scale horizontally] + +## Team Considerations +- **Learning Curve**: [Estimate for team] +- **Hiring**: [Availability of talent] +- **Support**: [Community, docs, enterprise support] +``` + +### 2. project-management/ARCHITECTURE.md +```markdown +# System Architecture + +## Overview +[2-3 sentence description of the system] + +## C4 Context Diagram +```mermaid +C4Context + title System Context for [Project Name] + + Person(user, "User", "End user of the system") + System(app, "Application", "Main system") + System_Ext(auth, "Auth Provider", "OAuth2 provider") + System_Ext(payment, "Payment Gateway", "Stripe") + + Rel(user, app, "Uses", "HTTPS") + Rel(app, auth, "Authenticates", "OAuth2") + Rel(app, payment, "Processes payments", "API") +``` + +## Component Architecture +```mermaid +graph TB + Client[React Client] + API[FastAPI Backend] + DB[(PostgreSQL)] + Cache[(Redis)] + Queue[Job Queue] + Worker[Background Workers] + + Client -->|HTTPS/JSON| API + API -->|SQL| DB + API -->|GET/SET| Cache + API -->|Enqueue| Queue + Queue -->|Process| Worker + Worker -->|Update| DB +``` + +### Component Details + +**Client (React)** +- **Responsibilities**: UI rendering, state management, client-side validation +- **Key Libraries**: React Router, Zustand, React Query +- **Interfaces**: REST API via fetch/axios + +**API (FastAPI)** +- **Responsibilities**: Business logic, validation, auth, rate limiting +- **Key Modules**: auth, users, payments, notifications +- **Interfaces**: REST endpoints (OpenAPI), WebSocket (notifications) + +**Database (PostgreSQL)** +- **Responsibilities**: Persistent data storage, relational integrity +- **Schema**: Users, sessions, transactions, audit logs +- **Patterns**: Repository pattern, connection pooling + +**Cache (Redis)** +- **Responsibilities**: Session storage, rate limiting, job queue +- **TTL Strategy**: Sessions (24h), API cache (5m), rate limits (1h) + +**Background Workers** +- **Responsibilities**: Email sending, report generation, cleanup jobs +- **Queue**: Redis-backed Celery/ARQ +- **Monitoring**: Dead letter queue, retry logic + +## Authentication Flow +```mermaid +sequenceDiagram + participant User + participant Client + participant API + participant Auth0 + participant DB + + User->>Client: Click "Login" + Client->>Auth0: Redirect to OAuth2 + Auth0->>Client: Return auth code + Client->>API: Exchange code for token + API->>Auth0: Validate code + Auth0->>API: User profile + API->>DB: Create/update user + API->>Client: Return JWT token + Client->>Client: Store token (httpOnly cookie) +``` + +## Data Flow: User Registration +```mermaid +sequenceDiagram + participant Client + participant API + participant DB + participant Queue + participant Worker + participant Email + + Client->>API: POST /api/register + API->>API: Validate input + API->>DB: Create user (inactive) + API->>Queue: Enqueue welcome email + API->>Client: 201 Created + Queue->>Worker: Process email job + Worker->>Email: Send welcome email + Worker->>DB: Log email sent +``` + +## Scalability Strategy + +### Horizontal Scaling +- **API**: Stateless containers (2-10 instances behind ALB) +- **Database**: Read replicas for reporting queries +- **Cache**: Redis Cluster (3+ nodes) +- **Workers**: Auto-scale based on queue depth + +### Caching Strategy +- **API Responses**: Cache GET endpoints (5m TTL) +- **Database Queries**: Query result cache in Redis +- **Static Assets**: CDN (CloudFront) with edge caching + +### Load Balancing +- **Application**: AWS ALB with health checks +- **Database**: pgpool for read/write splitting +- **Geographic**: Multi-region deployment (future) + +## Security Architecture + +### Authentication & Authorization +- **Strategy**: OAuth2 + JWT tokens (15m access, 7d refresh) +- **Storage**: httpOnly cookies for web, secure storage for mobile +- **Rotation**: Automatic token refresh + +### Data Protection +- **At Rest**: PostgreSQL encryption (AWS RDS) +- **In Transit**: TLS 1.3 for all connections +- **Secrets**: AWS Secrets Manager, rotated monthly + +### OWASP Top 10 Mitigations +- **Injection**: Parameterized queries (SQLAlchemy ORM) +- **Auth**: JWT validation, session management +- **XSS**: Content Security Policy, input sanitization +- **CSRF**: SameSite cookies, CSRF tokens +- **Rate Limiting**: Redis-backed (100 req/min per IP) + +### Network Security +- **VPC**: Private subnets for DB/workers +- **Security Groups**: Least privilege access +- **WAF**: CloudFront WAF rules + +## Deployment Architecture +```mermaid +graph TB + subgraph "Public Subnet" + ALB[Application Load Balancer] + end + + subgraph "Private Subnet - App Tier" + API1[API Container 1] + API2[API Container 2] + Worker1[Worker Container] + end + + subgraph "Private Subnet - Data Tier" + DB[(RDS PostgreSQL)] + Cache[(ElastiCache Redis)] + end + + Internet((Internet)) --> ALB + ALB --> API1 + ALB --> API2 + API1 --> DB + API2 --> DB + API1 --> Cache + API2 --> Cache + Worker1 --> DB + Worker1 --> Cache +``` + +## Monitoring & Observability + +**Metrics**: +- API latency (p50, p95, p99) +- Error rates by endpoint +- Database connection pool usage +- Cache hit/miss ratios + +**Logging**: +- Structured JSON logs (ECS logs to CloudWatch) +- Request ID tracing across services +- Error tracking (Sentry) + +**Alerting**: +- API error rate >1% +- Database connections >80% +- Job queue depth >1000 + +## Future Considerations + +**Phase 2 Enhancements**: +- GraphQL API option +- WebSocket real-time updates +- ElasticSearch for full-text search + +**Phase 3 Scale**: +- Multi-region deployment +- Event-driven microservices +- CQRS for read-heavy workloads +``` + +## Guardrails +- Keep technology choices pragmatic (avoid hype-driven development) +- Consider team skills when selecting stack +- Prefer managed services over self-hosted for DevOps +- Include alternatives to show deliberate choice +- Use mermaid for all diagrams (portable, version-controllable) +- Keep each document under 400 lines +- Link to official docs for each technology + +## Integration +- Run after PROJECT-OVERVIEW.md is created +- Feed into `/lazy plan` for user story creation +- Reference during `/lazy code` for implementation consistency +- Update during `/lazy review` if architecture evolves + +## Example Prompt +> Design the tech stack and architecture for this project + +## Validation Checklist +- [ ] TECH-STACK.md has all 4 categories (Frontend, Backend, Database, DevOps) +- [ ] Each technology has rationale and alternatives +- [ ] ARCHITECTURE.md has system overview + 3+ mermaid diagrams +- [ ] Authentication and data flow are documented +- [ ] Scalability and security sections are complete +- [ ] Trade-offs and migration path are clear diff --git a/.claude/skills/writing-skills/SKILL.md b/.claude/skills/writing-skills/SKILL.md deleted file mode 100644 index 9a429b7..0000000 --- a/.claude/skills/writing-skills/SKILL.md +++ /dev/null @@ -1,622 +0,0 @@ ---- -name: writing-skills -description: Use when creating new skills, editing existing skills, or verifying skills work before deployment - applies TDD to process documentation by testing with subagents before writing, iterating until bulletproof against rationalization ---- - -# Writing Skills - -## Overview - -**Writing skills IS Test-Driven Development applied to process documentation.** - -**Personal skills are written to `~/.claude/skills`** - -You write test cases (pressure scenarios with subagents), watch them fail (baseline behavior), write the skill (documentation), watch tests pass (agents comply), and refactor (close loopholes). - -**Core principle:** If you didn't watch an agent fail without the skill, you don't know if the skill teaches the right thing. - -**REQUIRED BACKGROUND:** You MUST understand superpowers:test-driven-development before using this skill. That skill defines the fundamental RED-GREEN-REFACTOR cycle. This skill adapts TDD to documentation. - -**Official guidance:** For Anthropic's official skill authoring best practices, see anthropic-best-practices.md. This document provides additional patterns and guidelines that complement the TDD-focused approach in this skill. - -## What is a Skill? - -A **skill** is a reference guide for proven techniques, patterns, or tools. Skills help future Claude instances find and apply effective approaches. - -**Skills are:** Reusable techniques, patterns, tools, reference guides - -**Skills are NOT:** Narratives about how you solved a problem once - -## TDD Mapping for Skills - -| TDD Concept | Skill Creation | -|-------------|----------------| -| **Test case** | Pressure scenario with subagent | -| **Production code** | Skill document (SKILL.md) | -| **Test fails (RED)** | Agent violates rule without skill (baseline) | -| **Test passes (GREEN)** | Agent complies with skill present | -| **Refactor** | Close loopholes while maintaining compliance | -| **Write test first** | Run baseline scenario BEFORE writing skill | -| **Watch it fail** | Document exact rationalizations agent uses | -| **Minimal code** | Write skill addressing those specific violations | -| **Watch it pass** | Verify agent now complies | -| **Refactor cycle** | Find new rationalizations → plug → re-verify | - -The entire skill creation process follows RED-GREEN-REFACTOR. - -## When to Create a Skill - -**Create when:** -- Technique wasn't intuitively obvious to you -- You'd reference this again across projects -- Pattern applies broadly (not project-specific) -- Others would benefit - -**Don't create for:** -- One-off solutions -- Standard practices well-documented elsewhere -- Project-specific conventions (put in CLAUDE.md) - -## Skill Types - -### Technique -Concrete method with steps to follow (condition-based-waiting, root-cause-tracing) - -### Pattern -Way of thinking about problems (flatten-with-flags, test-invariants) - -### Reference -API docs, syntax guides, tool documentation (office docs) - -## Directory Structure - - -``` -skills/ - skill-name/ - SKILL.md # Main reference (required) - supporting-file.* # Only if needed -``` - -**Flat namespace** - all skills in one searchable namespace - -**Separate files for:** -1. **Heavy reference** (100+ lines) - API docs, comprehensive syntax -2. **Reusable tools** - Scripts, utilities, templates - -**Keep inline:** -- Principles and concepts -- Code patterns (< 50 lines) -- Everything else - -## SKILL.md Structure - -**Frontmatter (YAML):** -- Only two fields supported: `name` and `description` -- Max 1024 characters total -- `name`: Use letters, numbers, and hyphens only (no parentheses, special chars) -- `description`: Third-person, includes BOTH what it does AND when to use it - - Start with "Use when..." to focus on triggering conditions - - Include specific symptoms, situations, and contexts - - Keep under 500 characters if possible - -```markdown ---- -name: Skill-Name-With-Hyphens -description: Use when [specific triggering conditions and symptoms] - [what the skill does and how it helps, written in third person] ---- - -# Skill Name - -## Overview -What is this? Core principle in 1-2 sentences. - -## When to Use -[Small inline flowchart IF decision non-obvious] - -Bullet list with SYMPTOMS and use cases -When NOT to use - -## Core Pattern (for techniques/patterns) -Before/after code comparison - -## Quick Reference -Table or bullets for scanning common operations - -## Implementation -Inline code for simple patterns -Link to file for heavy reference or reusable tools - -## Common Mistakes -What goes wrong + fixes - -## Real-World Impact (optional) -Concrete results -``` - - -## Claude Search Optimization (CSO) - -**Critical for discovery:** Future Claude needs to FIND your skill - -### 1. Rich Description Field - -**Purpose:** Claude reads description to decide which skills to load for a given task. Make it answer: "Should I read this skill right now?" - -**Format:** Start with "Use when..." to focus on triggering conditions, then explain what it does - -**Content:** -- Use concrete triggers, symptoms, and situations that signal this skill applies -- Describe the *problem* (race conditions, inconsistent behavior) not *language-specific symptoms* (setTimeout, sleep) -- Keep triggers technology-agnostic unless the skill itself is technology-specific -- If skill is technology-specific, make that explicit in the trigger -- Write in third person (injected into system prompt) - -```yaml -# ❌ BAD: Too abstract, vague, doesn't include when to use -description: For async testing - -# ❌ BAD: First person -description: I can help you with async tests when they're flaky - -# ❌ BAD: Mentions technology but skill isn't specific to it -description: Use when tests use setTimeout/sleep and are flaky - -# ✅ GOOD: Starts with "Use when", describes problem, then what it does -description: Use when tests have race conditions, timing dependencies, or pass/fail inconsistently - replaces arbitrary timeouts with condition polling for reliable async tests - -# ✅ GOOD: Technology-specific skill with explicit trigger -description: Use when using React Router and handling authentication redirects - provides patterns for protected routes and auth state management -``` - -### 2. Keyword Coverage - -Use words Claude would search for: -- Error messages: "Hook timed out", "ENOTEMPTY", "race condition" -- Symptoms: "flaky", "hanging", "zombie", "pollution" -- Synonyms: "timeout/hang/freeze", "cleanup/teardown/afterEach" -- Tools: Actual commands, library names, file types - -### 3. Descriptive Naming - -**Use active voice, verb-first:** -- ✅ `creating-skills` not `skill-creation` -- ✅ `testing-skills-with-subagents` not `subagent-skill-testing` - -### 4. Token Efficiency (Critical) - -**Problem:** getting-started and frequently-referenced skills load into EVERY conversation. Every token counts. - -**Target word counts:** -- getting-started workflows: <150 words each -- Frequently-loaded skills: <200 words total -- Other skills: <500 words (still be concise) - -**Techniques:** - -**Move details to tool help:** -```bash -# ❌ BAD: Document all flags in SKILL.md -search-conversations supports --text, --both, --after DATE, --before DATE, --limit N - -# ✅ GOOD: Reference --help -search-conversations supports multiple modes and filters. Run --help for details. -``` - -**Use cross-references:** -```markdown -# ❌ BAD: Repeat workflow details -When searching, dispatch subagent with template... -[20 lines of repeated instructions] - -# ✅ GOOD: Reference other skill -Always use subagents (50-100x context savings). REQUIRED: Use [other-skill-name] for workflow. -``` - -**Compress examples:** -```markdown -# ❌ BAD: Verbose example (42 words) -your human partner: "How did we handle authentication errors in React Router before?" -You: I'll search past conversations for React Router authentication patterns. -[Dispatch subagent with search query: "React Router authentication error handling 401"] - -# ✅ GOOD: Minimal example (20 words) -Partner: "How did we handle auth errors in React Router?" -You: Searching... -[Dispatch subagent → synthesis] -``` - -**Eliminate redundancy:** -- Don't repeat what's in cross-referenced skills -- Don't explain what's obvious from command -- Don't include multiple examples of same pattern - -**Verification:** -```bash -wc -w skills/path/SKILL.md -# getting-started workflows: aim for <150 each -# Other frequently-loaded: aim for <200 total -``` - -**Name by what you DO or core insight:** -- ✅ `condition-based-waiting` > `async-test-helpers` -- ✅ `using-skills` not `skill-usage` -- ✅ `flatten-with-flags` > `data-structure-refactoring` -- ✅ `root-cause-tracing` > `debugging-techniques` - -**Gerunds (-ing) work well for processes:** -- `creating-skills`, `testing-skills`, `debugging-with-logs` -- Active, describes the action you're taking - -### 4. Cross-Referencing Other Skills - -**When writing documentation that references other skills:** - -Use skill name only, with explicit requirement markers: -- ✅ Good: `**REQUIRED SUB-SKILL:** Use superpowers:test-driven-development` -- ✅ Good: `**REQUIRED BACKGROUND:** You MUST understand superpowers:systematic-debugging` -- ❌ Bad: `See skills/testing/test-driven-development` (unclear if required) -- ❌ Bad: `@skills/testing/test-driven-development/SKILL.md` (force-loads, burns context) - -**Why no @ links:** `@` syntax force-loads files immediately, consuming 200k+ context before you need them. - -## Flowchart Usage - -```dot -digraph when_flowchart { - "Need to show information?" [shape=diamond]; - "Decision where I might go wrong?" [shape=diamond]; - "Use markdown" [shape=box]; - "Small inline flowchart" [shape=box]; - - "Need to show information?" -> "Decision where I might go wrong?" [label="yes"]; - "Decision where I might go wrong?" -> "Small inline flowchart" [label="yes"]; - "Decision where I might go wrong?" -> "Use markdown" [label="no"]; -} -``` - -**Use flowcharts ONLY for:** -- Non-obvious decision points -- Process loops where you might stop too early -- "When to use A vs B" decisions - -**Never use flowcharts for:** -- Reference material → Tables, lists -- Code examples → Markdown blocks -- Linear instructions → Numbered lists -- Labels without semantic meaning (step1, helper2) - -See @graphviz-conventions.dot for graphviz style rules. - -## Code Examples - -**One excellent example beats many mediocre ones** - -Choose most relevant language: -- Testing techniques → TypeScript/JavaScript -- System debugging → Shell/Python -- Data processing → Python - -**Good example:** -- Complete and runnable -- Well-commented explaining WHY -- From real scenario -- Shows pattern clearly -- Ready to adapt (not generic template) - -**Don't:** -- Implement in 5+ languages -- Create fill-in-the-blank templates -- Write contrived examples - -You're good at porting - one great example is enough. - -## File Organization - -### Self-Contained Skill -``` -defense-in-depth/ - SKILL.md # Everything inline -``` -When: All content fits, no heavy reference needed - -### Skill with Reusable Tool -``` -condition-based-waiting/ - SKILL.md # Overview + patterns - example.ts # Working helpers to adapt -``` -When: Tool is reusable code, not just narrative - -### Skill with Heavy Reference -``` -pptx/ - SKILL.md # Overview + workflows - pptxgenjs.md # 600 lines API reference - ooxml.md # 500 lines XML structure - scripts/ # Executable tools -``` -When: Reference material too large for inline - -## The Iron Law (Same as TDD) - -``` -NO SKILL WITHOUT A FAILING TEST FIRST -``` - -This applies to NEW skills AND EDITS to existing skills. - -Write skill before testing? Delete it. Start over. -Edit skill without testing? Same violation. - -**No exceptions:** -- Not for "simple additions" -- Not for "just adding a section" -- Not for "documentation updates" -- Don't keep untested changes as "reference" -- Don't "adapt" while running tests -- Delete means delete - -**REQUIRED BACKGROUND:** The superpowers:test-driven-development skill explains why this matters. Same principles apply to documentation. - -## Testing All Skill Types - -Different skill types need different test approaches: - -### Discipline-Enforcing Skills (rules/requirements) - -**Examples:** TDD, verification-before-completion, designing-before-coding - -**Test with:** -- Academic questions: Do they understand the rules? -- Pressure scenarios: Do they comply under stress? -- Multiple pressures combined: time + sunk cost + exhaustion -- Identify rationalizations and add explicit counters - -**Success criteria:** Agent follows rule under maximum pressure - -### Technique Skills (how-to guides) - -**Examples:** condition-based-waiting, root-cause-tracing, defensive-programming - -**Test with:** -- Application scenarios: Can they apply the technique correctly? -- Variation scenarios: Do they handle edge cases? -- Missing information tests: Do instructions have gaps? - -**Success criteria:** Agent successfully applies technique to new scenario - -### Pattern Skills (mental models) - -**Examples:** reducing-complexity, information-hiding concepts - -**Test with:** -- Recognition scenarios: Do they recognize when pattern applies? -- Application scenarios: Can they use the mental model? -- Counter-examples: Do they know when NOT to apply? - -**Success criteria:** Agent correctly identifies when/how to apply pattern - -### Reference Skills (documentation/APIs) - -**Examples:** API documentation, command references, library guides - -**Test with:** -- Retrieval scenarios: Can they find the right information? -- Application scenarios: Can they use what they found correctly? -- Gap testing: Are common use cases covered? - -**Success criteria:** Agent finds and correctly applies reference information - -## Common Rationalizations for Skipping Testing - -| Excuse | Reality | -|--------|---------| -| "Skill is obviously clear" | Clear to you ≠ clear to other agents. Test it. | -| "It's just a reference" | References can have gaps, unclear sections. Test retrieval. | -| "Testing is overkill" | Untested skills have issues. Always. 15 min testing saves hours. | -| "I'll test if problems emerge" | Problems = agents can't use skill. Test BEFORE deploying. | -| "Too tedious to test" | Testing is less tedious than debugging bad skill in production. | -| "I'm confident it's good" | Overconfidence guarantees issues. Test anyway. | -| "Academic review is enough" | Reading ≠ using. Test application scenarios. | -| "No time to test" | Deploying untested skill wastes more time fixing it later. | - -**All of these mean: Test before deploying. No exceptions.** - -## Bulletproofing Skills Against Rationalization - -Skills that enforce discipline (like TDD) need to resist rationalization. Agents are smart and will find loopholes when under pressure. - -**Psychology note:** Understanding WHY persuasion techniques work helps you apply them systematically. See persuasion-principles.md for research foundation (Cialdini, 2021; Meincke et al., 2025) on authority, commitment, scarcity, social proof, and unity principles. - -### Close Every Loophole Explicitly - -Don't just state the rule - forbid specific workarounds: - - -```markdown -Write code before test? Delete it. -``` - - - -```markdown -Write code before test? Delete it. Start over. - -**No exceptions:** -- Don't keep it as "reference" -- Don't "adapt" it while writing tests -- Don't look at it -- Delete means delete -``` - - -### Address "Spirit vs Letter" Arguments - -Add foundational principle early: - -```markdown -**Violating the letter of the rules is violating the spirit of the rules.** -``` - -This cuts off entire class of "I'm following the spirit" rationalizations. - -### Build Rationalization Table - -Capture rationalizations from baseline testing (see Testing section below). Every excuse agents make goes in the table: - -```markdown -| Excuse | Reality | -|--------|---------| -| "Too simple to test" | Simple code breaks. Test takes 30 seconds. | -| "I'll test after" | Tests passing immediately prove nothing. | -| "Tests after achieve same goals" | Tests-after = "what does this do?" Tests-first = "what should this do?" | -``` - -### Create Red Flags List - -Make it easy for agents to self-check when rationalizing: - -```markdown -## Red Flags - STOP and Start Over - -- Code before test -- "I already manually tested it" -- "Tests after achieve the same purpose" -- "It's about spirit not ritual" -- "This is different because..." - -**All of these mean: Delete code. Start over with TDD.** -``` - -### Update CSO for Violation Symptoms - -Add to description: symptoms of when you're ABOUT to violate the rule: - -```yaml -description: use when implementing any feature or bugfix, before writing implementation code -``` - -## RED-GREEN-REFACTOR for Skills - -Follow the TDD cycle: - -### RED: Write Failing Test (Baseline) - -Run pressure scenario with subagent WITHOUT the skill. Document exact behavior: -- What choices did they make? -- What rationalizations did they use (verbatim)? -- Which pressures triggered violations? - -This is "watch the test fail" - you must see what agents naturally do before writing the skill. - -### GREEN: Write Minimal Skill - -Write skill that addresses those specific rationalizations. Don't add extra content for hypothetical cases. - -Run same scenarios WITH skill. Agent should now comply. - -### REFACTOR: Close Loopholes - -Agent found new rationalization? Add explicit counter. Re-test until bulletproof. - -**REQUIRED SUB-SKILL:** Use superpowers:testing-skills-with-subagents for the complete testing methodology: -- How to write pressure scenarios -- Pressure types (time, sunk cost, authority, exhaustion) -- Plugging holes systematically -- Meta-testing techniques - -## Anti-Patterns - -### ❌ Narrative Example -"In session 2025-10-03, we found empty projectDir caused..." -**Why bad:** Too specific, not reusable - -### ❌ Multi-Language Dilution -example-js.js, example-py.py, example-go.go -**Why bad:** Mediocre quality, maintenance burden - -### ❌ Code in Flowcharts -```dot -step1 [label="import fs"]; -step2 [label="read file"]; -``` -**Why bad:** Can't copy-paste, hard to read - -### ❌ Generic Labels -helper1, helper2, step3, pattern4 -**Why bad:** Labels should have semantic meaning - -## STOP: Before Moving to Next Skill - -**After writing ANY skill, you MUST STOP and complete the deployment process.** - -**Do NOT:** -- Create multiple skills in batch without testing each -- Move to next skill before current one is verified -- Skip testing because "batching is more efficient" - -**The deployment checklist below is MANDATORY for EACH skill.** - -Deploying untested skills = deploying untested code. It's a violation of quality standards. - -## Skill Creation Checklist (TDD Adapted) - -**IMPORTANT: Use TodoWrite to create todos for EACH checklist item below.** - -**RED Phase - Write Failing Test:** -- [ ] Create pressure scenarios (3+ combined pressures for discipline skills) -- [ ] Run scenarios WITHOUT skill - document baseline behavior verbatim -- [ ] Identify patterns in rationalizations/failures - -**GREEN Phase - Write Minimal Skill:** -- [ ] Name uses only letters, numbers, hyphens (no parentheses/special chars) -- [ ] YAML frontmatter with only name and description (max 1024 chars) -- [ ] Description starts with "Use when..." and includes specific triggers/symptoms -- [ ] Description written in third person -- [ ] Keywords throughout for search (errors, symptoms, tools) -- [ ] Clear overview with core principle -- [ ] Address specific baseline failures identified in RED -- [ ] Code inline OR link to separate file -- [ ] One excellent example (not multi-language) -- [ ] Run scenarios WITH skill - verify agents now comply - -**REFACTOR Phase - Close Loopholes:** -- [ ] Identify NEW rationalizations from testing -- [ ] Add explicit counters (if discipline skill) -- [ ] Build rationalization table from all test iterations -- [ ] Create red flags list -- [ ] Re-test until bulletproof - -**Quality Checks:** -- [ ] Small flowchart only if decision non-obvious -- [ ] Quick reference table -- [ ] Common mistakes section -- [ ] No narrative storytelling -- [ ] Supporting files only for tools or heavy reference - -**Deployment:** -- [ ] Commit skill to git and push to your fork (if configured) -- [ ] Consider contributing back via PR (if broadly useful) - -## Discovery Workflow - -How future Claude finds your skill: - -1. **Encounters problem** ("tests are flaky") -3. **Finds SKILL** (description matches) -4. **Scans overview** (is this relevant?) -5. **Reads patterns** (quick reference table) -6. **Loads example** (only when implementing) - -**Optimize for this flow** - put searchable terms early and often. - -## The Bottom Line - -**Creating skills IS TDD for process documentation.** - -Same Iron Law: No skill without failing test first. -Same cycle: RED (baseline) → GREEN (write skill) → REFACTOR (close loopholes). -Same benefits: Better quality, fewer surprises, bulletproof results. - -If you follow TDD for code, follow it for skills. It's the same discipline applied to documentation. diff --git a/.claude/skills/writing-skills/anthropic-best-practice.md b/.claude/skills/writing-skills/anthropic-best-practice.md deleted file mode 100644 index 45bf8f4..0000000 --- a/.claude/skills/writing-skills/anthropic-best-practice.md +++ /dev/null @@ -1,1150 +0,0 @@ -# Skill authoring best practices - -> Learn how to write effective Skills that Claude can discover and use successfully. - -Good Skills are concise, well-structured, and tested with real usage. This guide provides practical authoring decisions to help you write Skills that Claude can discover and use effectively. - -For conceptual background on how Skills work, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview). - -## Core principles - -### Concise is key - -The [context window](/en/docs/build-with-claude/context-windows) is a public good. Your Skill shares the context window with everything else Claude needs to know, including: - -* The system prompt -* Conversation history -* Other Skills' metadata -* Your actual request - -Not every token in your Skill has an immediate cost. At startup, only the metadata (name and description) from all Skills is pre-loaded. Claude reads SKILL.md only when the Skill becomes relevant, and reads additional files only as needed. However, being concise in SKILL.md still matters: once Claude loads it, every token competes with conversation history and other context. - -**Default assumption**: Claude is already very smart - -Only add context Claude doesn't already have. Challenge each piece of information: - -* "Does Claude really need this explanation?" -* "Can I assume Claude knows this?" -* "Does this paragraph justify its token cost?" - -**Good example: Concise** (approximately 50 tokens): - -````markdown theme={null} -## Extract PDF text - -Use pdfplumber for text extraction: - -```python -import pdfplumber - -with pdfplumber.open("file.pdf") as pdf: - text = pdf.pages[0].extract_text() -``` -```` - -**Bad example: Too verbose** (approximately 150 tokens): - -```markdown theme={null} -## Extract PDF text - -PDF (Portable Document Format) files are a common file format that contains -text, images, and other content. To extract text from a PDF, you'll need to -use a library. There are many libraries available for PDF processing, but we -recommend pdfplumber because it's easy to use and handles most cases well. -First, you'll need to install it using pip. Then you can use the code below... -``` - -The concise version assumes Claude knows what PDFs are and how libraries work. - -### Set appropriate degrees of freedom - -Match the level of specificity to the task's fragility and variability. - -**High freedom** (text-based instructions): - -Use when: - -* Multiple approaches are valid -* Decisions depend on context -* Heuristics guide the approach - -Example: - -```markdown theme={null} -## Code review process - -1. Analyze the code structure and organization -2. Check for potential bugs or edge cases -3. Suggest improvements for readability and maintainability -4. Verify adherence to project conventions -``` - -**Medium freedom** (pseudocode or scripts with parameters): - -Use when: - -* A preferred pattern exists -* Some variation is acceptable -* Configuration affects behavior - -Example: - -````markdown theme={null} -## Generate report - -Use this template and customize as needed: - -```python -def generate_report(data, format="markdown", include_charts=True): - # Process data - # Generate output in specified format - # Optionally include visualizations -``` -```` - -**Low freedom** (specific scripts, few or no parameters): - -Use when: - -* Operations are fragile and error-prone -* Consistency is critical -* A specific sequence must be followed - -Example: - -````markdown theme={null} -## Database migration - -Run exactly this script: - -```bash -python scripts/migrate.py --verify --backup -``` - -Do not modify the command or add additional flags. -```` - -**Analogy**: Think of Claude as a robot exploring a path: - -* **Narrow bridge with cliffs on both sides**: There's only one safe way forward. Provide specific guardrails and exact instructions (low freedom). Example: database migrations that must run in exact sequence. -* **Open field with no hazards**: Many paths lead to success. Give general direction and trust Claude to find the best route (high freedom). Example: code reviews where context determines the best approach. - -### Test with all models you plan to use - -Skills act as additions to models, so effectiveness depends on the underlying model. Test your Skill with all the models you plan to use it with. - -**Testing considerations by model**: - -* **Claude Haiku** (fast, economical): Does the Skill provide enough guidance? -* **Claude Sonnet** (balanced): Is the Skill clear and efficient? -* **Claude Opus** (powerful reasoning): Does the Skill avoid over-explaining? - -What works perfectly for Opus might need more detail for Haiku. If you plan to use your Skill across multiple models, aim for instructions that work well with all of them. - -## Skill structure - - - **YAML Frontmatter**: The SKILL.md frontmatter supports two fields: - - * `name` - Human-readable name of the Skill (64 characters maximum) - * `description` - One-line description of what the Skill does and when to use it (1024 characters maximum) - - For complete Skill structure details, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#skill-structure). - - -### Naming conventions - -Use consistent naming patterns to make Skills easier to reference and discuss. We recommend using **gerund form** (verb + -ing) for Skill names, as this clearly describes the activity or capability the Skill provides. - -**Good naming examples (gerund form)**: - -* "Processing PDFs" -* "Analyzing spreadsheets" -* "Managing databases" -* "Testing code" -* "Writing documentation" - -**Acceptable alternatives**: - -* Noun phrases: "PDF Processing", "Spreadsheet Analysis" -* Action-oriented: "Process PDFs", "Analyze Spreadsheets" - -**Avoid**: - -* Vague names: "Helper", "Utils", "Tools" -* Overly generic: "Documents", "Data", "Files" -* Inconsistent patterns within your skill collection - -Consistent naming makes it easier to: - -* Reference Skills in documentation and conversations -* Understand what a Skill does at a glance -* Organize and search through multiple Skills -* Maintain a professional, cohesive skill library - -### Writing effective descriptions - -The `description` field enables Skill discovery and should include both what the Skill does and when to use it. - - - **Always write in third person**. The description is injected into the system prompt, and inconsistent point-of-view can cause discovery problems. - - * **Good:** "Processes Excel files and generates reports" - * **Avoid:** "I can help you process Excel files" - * **Avoid:** "You can use this to process Excel files" - - -**Be specific and include key terms**. Include both what the Skill does and specific triggers/contexts for when to use it. - -Each Skill has exactly one description field. The description is critical for skill selection: Claude uses it to choose the right Skill from potentially 100+ available Skills. Your description must provide enough detail for Claude to know when to select this Skill, while the rest of SKILL.md provides the implementation details. - -Effective examples: - -**PDF Processing skill:** - -```yaml theme={null} -description: Extract text and tables from PDF files, fill forms, merge documents. Use when working with PDF files or when the user mentions PDFs, forms, or document extraction. -``` - -**Excel Analysis skill:** - -```yaml theme={null} -description: Analyze Excel spreadsheets, create pivot tables, generate charts. Use when analyzing Excel files, spreadsheets, tabular data, or .xlsx files. -``` - -**Git Commit Helper skill:** - -```yaml theme={null} -description: Generate descriptive commit messages by analyzing git diffs. Use when the user asks for help writing commit messages or reviewing staged changes. -``` - -Avoid vague descriptions like these: - -```yaml theme={null} -description: Helps with documents -``` - -```yaml theme={null} -description: Processes data -``` - -```yaml theme={null} -description: Does stuff with files -``` - -### Progressive disclosure patterns - -SKILL.md serves as an overview that points Claude to detailed materials as needed, like a table of contents in an onboarding guide. For an explanation of how progressive disclosure works, see [How Skills work](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work) in the overview. - -**Practical guidance:** - -* Keep SKILL.md body under 500 lines for optimal performance -* Split content into separate files when approaching this limit -* Use the patterns below to organize instructions, code, and resources effectively - -#### Visual overview: From simple to complex - -A basic Skill starts with just a SKILL.md file containing metadata and instructions: - -Simple SKILL.md file showing YAML frontmatter and markdown body - -As your Skill grows, you can bundle additional content that Claude loads only when needed: - -Bundling additional reference files like reference.md and forms.md. - -The complete Skill directory structure might look like this: - -``` -pdf/ -├── SKILL.md # Main instructions (loaded when triggered) -├── FORMS.md # Form-filling guide (loaded as needed) -├── reference.md # API reference (loaded as needed) -├── examples.md # Usage examples (loaded as needed) -└── scripts/ - ├── analyze_form.py # Utility script (executed, not loaded) - ├── fill_form.py # Form filling script - └── validate.py # Validation script -``` - -#### Pattern 1: High-level guide with references - -````markdown theme={null} ---- -name: PDF Processing -description: Extracts text and tables from PDF files, fills forms, and merges documents. Use when working with PDF files or when the user mentions PDFs, forms, or document extraction. ---- - -# PDF Processing - -## Quick start - -Extract text with pdfplumber: -```python -import pdfplumber -with pdfplumber.open("file.pdf") as pdf: - text = pdf.pages[0].extract_text() -``` - -## Advanced features - -**Form filling**: See [FORMS.md](FORMS.md) for complete guide -**API reference**: See [REFERENCE.md](REFERENCE.md) for all methods -**Examples**: See [EXAMPLES.md](EXAMPLES.md) for common patterns -```` - -Claude loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed. - -#### Pattern 2: Domain-specific organization - -For Skills with multiple domains, organize content by domain to avoid loading irrelevant context. When a user asks about sales metrics, Claude only needs to read sales-related schemas, not finance or marketing data. This keeps token usage low and context focused. - -``` -bigquery-skill/ -├── SKILL.md (overview and navigation) -└── reference/ - ├── finance.md (revenue, billing metrics) - ├── sales.md (opportunities, pipeline) - ├── product.md (API usage, features) - └── marketing.md (campaigns, attribution) -``` - -````markdown SKILL.md theme={null} -# BigQuery Data Analysis - -## Available datasets - -**Finance**: Revenue, ARR, billing → See [reference/finance.md](reference/finance.md) -**Sales**: Opportunities, pipeline, accounts → See [reference/sales.md](reference/sales.md) -**Product**: API usage, features, adoption → See [reference/product.md](reference/product.md) -**Marketing**: Campaigns, attribution, email → See [reference/marketing.md](reference/marketing.md) - -## Quick search - -Find specific metrics using grep: - -```bash -grep -i "revenue" reference/finance.md -grep -i "pipeline" reference/sales.md -grep -i "api usage" reference/product.md -``` -```` - -#### Pattern 3: Conditional details - -Show basic content, link to advanced content: - -```markdown theme={null} -# DOCX Processing - -## Creating documents - -Use docx-js for new documents. See [DOCX-JS.md](DOCX-JS.md). - -## Editing documents - -For simple edits, modify the XML directly. - -**For tracked changes**: See [REDLINING.md](REDLINING.md) -**For OOXML details**: See [OOXML.md](OOXML.md) -``` - -Claude reads REDLINING.md or OOXML.md only when the user needs those features. - -### Avoid deeply nested references - -Claude may partially read files when they're referenced from other referenced files. When encountering nested references, Claude might use commands like `head -100` to preview content rather than reading entire files, resulting in incomplete information. - -**Keep references one level deep from SKILL.md**. All reference files should link directly from SKILL.md to ensure Claude reads complete files when needed. - -**Bad example: Too deep**: - -```markdown theme={null} -# SKILL.md -See [advanced.md](advanced.md)... - -# advanced.md -See [details.md](details.md)... - -# details.md -Here's the actual information... -``` - -**Good example: One level deep**: - -```markdown theme={null} -# SKILL.md - -**Basic usage**: [instructions in SKILL.md] -**Advanced features**: See [advanced.md](advanced.md) -**API reference**: See [reference.md](reference.md) -**Examples**: See [examples.md](examples.md) -``` - -### Structure longer reference files with table of contents - -For reference files longer than 100 lines, include a table of contents at the top. This ensures Claude can see the full scope of available information even when previewing with partial reads. - -**Example**: - -```markdown theme={null} -# API Reference - -## Contents -- Authentication and setup -- Core methods (create, read, update, delete) -- Advanced features (batch operations, webhooks) -- Error handling patterns -- Code examples - -## Authentication and setup -... - -## Core methods -... -``` - -Claude can then read the complete file or jump to specific sections as needed. - -For details on how this filesystem-based architecture enables progressive disclosure, see the [Runtime environment](#runtime-environment) section in the Advanced section below. - -## Workflows and feedback loops - -### Use workflows for complex tasks - -Break complex operations into clear, sequential steps. For particularly complex workflows, provide a checklist that Claude can copy into its response and check off as it progresses. - -**Example 1: Research synthesis workflow** (for Skills without code): - -````markdown theme={null} -## Research synthesis workflow - -Copy this checklist and track your progress: - -``` -Research Progress: -- [ ] Step 1: Read all source documents -- [ ] Step 2: Identify key themes -- [ ] Step 3: Cross-reference claims -- [ ] Step 4: Create structured summary -- [ ] Step 5: Verify citations -``` - -**Step 1: Read all source documents** - -Review each document in the `sources/` directory. Note the main arguments and supporting evidence. - -**Step 2: Identify key themes** - -Look for patterns across sources. What themes appear repeatedly? Where do sources agree or disagree? - -**Step 3: Cross-reference claims** - -For each major claim, verify it appears in the source material. Note which source supports each point. - -**Step 4: Create structured summary** - -Organize findings by theme. Include: -- Main claim -- Supporting evidence from sources -- Conflicting viewpoints (if any) - -**Step 5: Verify citations** - -Check that every claim references the correct source document. If citations are incomplete, return to Step 3. -```` - -This example shows how workflows apply to analysis tasks that don't require code. The checklist pattern works for any complex, multi-step process. - -**Example 2: PDF form filling workflow** (for Skills with code): - -````markdown theme={null} -## PDF form filling workflow - -Copy this checklist and check off items as you complete them: - -``` -Task Progress: -- [ ] Step 1: Analyze the form (run analyze_form.py) -- [ ] Step 2: Create field mapping (edit fields.json) -- [ ] Step 3: Validate mapping (run validate_fields.py) -- [ ] Step 4: Fill the form (run fill_form.py) -- [ ] Step 5: Verify output (run verify_output.py) -``` - -**Step 1: Analyze the form** - -Run: `python scripts/analyze_form.py input.pdf` - -This extracts form fields and their locations, saving to `fields.json`. - -**Step 2: Create field mapping** - -Edit `fields.json` to add values for each field. - -**Step 3: Validate mapping** - -Run: `python scripts/validate_fields.py fields.json` - -Fix any validation errors before continuing. - -**Step 4: Fill the form** - -Run: `python scripts/fill_form.py input.pdf fields.json output.pdf` - -**Step 5: Verify output** - -Run: `python scripts/verify_output.py output.pdf` - -If verification fails, return to Step 2. -```` - -Clear steps prevent Claude from skipping critical validation. The checklist helps both Claude and you track progress through multi-step workflows. - -### Implement feedback loops - -**Common pattern**: Run validator → fix errors → repeat - -This pattern greatly improves output quality. - -**Example 1: Style guide compliance** (for Skills without code): - -```markdown theme={null} -## Content review process - -1. Draft your content following the guidelines in STYLE_GUIDE.md -2. Review against the checklist: - - Check terminology consistency - - Verify examples follow the standard format - - Confirm all required sections are present -3. If issues found: - - Note each issue with specific section reference - - Revise the content - - Review the checklist again -4. Only proceed when all requirements are met -5. Finalize and save the document -``` - -This shows the validation loop pattern using reference documents instead of scripts. The "validator" is STYLE\_GUIDE.md, and Claude performs the check by reading and comparing. - -**Example 2: Document editing process** (for Skills with code): - -```markdown theme={null} -## Document editing process - -1. Make your edits to `word/document.xml` -2. **Validate immediately**: `python ooxml/scripts/validate.py unpacked_dir/` -3. If validation fails: - - Review the error message carefully - - Fix the issues in the XML - - Run validation again -4. **Only proceed when validation passes** -5. Rebuild: `python ooxml/scripts/pack.py unpacked_dir/ output.docx` -6. Test the output document -``` - -The validation loop catches errors early. - -## Content guidelines - -### Avoid time-sensitive information - -Don't include information that will become outdated: - -**Bad example: Time-sensitive** (will become wrong): - -```markdown theme={null} -If you're doing this before August 2025, use the old API. -After August 2025, use the new API. -``` - -**Good example** (use "old patterns" section): - -```markdown theme={null} -## Current method - -Use the v2 API endpoint: `api.example.com/v2/messages` - -## Old patterns - -
-Legacy v1 API (deprecated 2025-08) - -The v1 API used: `api.example.com/v1/messages` - -This endpoint is no longer supported. -
-``` - -The old patterns section provides historical context without cluttering the main content. - -### Use consistent terminology - -Choose one term and use it throughout the Skill: - -**Good - Consistent**: - -* Always "API endpoint" -* Always "field" -* Always "extract" - -**Bad - Inconsistent**: - -* Mix "API endpoint", "URL", "API route", "path" -* Mix "field", "box", "element", "control" -* Mix "extract", "pull", "get", "retrieve" - -Consistency helps Claude understand and follow instructions. - -## Common patterns - -### Template pattern - -Provide templates for output format. Match the level of strictness to your needs. - -**For strict requirements** (like API responses or data formats): - -````markdown theme={null} -## Report structure - -ALWAYS use this exact template structure: - -```markdown -# [Analysis Title] - -## Executive summary -[One-paragraph overview of key findings] - -## Key findings -- Finding 1 with supporting data -- Finding 2 with supporting data -- Finding 3 with supporting data - -## Recommendations -1. Specific actionable recommendation -2. Specific actionable recommendation -``` -```` - -**For flexible guidance** (when adaptation is useful): - -````markdown theme={null} -## Report structure - -Here is a sensible default format, but use your best judgment based on the analysis: - -```markdown -# [Analysis Title] - -## Executive summary -[Overview] - -## Key findings -[Adapt sections based on what you discover] - -## Recommendations -[Tailor to the specific context] -``` - -Adjust sections as needed for the specific analysis type. -```` - -### Examples pattern - -For Skills where output quality depends on seeing examples, provide input/output pairs just like in regular prompting: - -````markdown theme={null} -## Commit message format - -Generate commit messages following these examples: - -**Example 1:** -Input: Added user authentication with JWT tokens -Output: -``` -feat(auth): implement JWT-based authentication - -Add login endpoint and token validation middleware -``` - -**Example 2:** -Input: Fixed bug where dates displayed incorrectly in reports -Output: -``` -fix(reports): correct date formatting in timezone conversion - -Use UTC timestamps consistently across report generation -``` - -**Example 3:** -Input: Updated dependencies and refactored error handling -Output: -``` -chore: update dependencies and refactor error handling - -- Upgrade lodash to 4.17.21 -- Standardize error response format across endpoints -``` - -Follow this style: type(scope): brief description, then detailed explanation. -```` - -Examples help Claude understand the desired style and level of detail more clearly than descriptions alone. - -### Conditional workflow pattern - -Guide Claude through decision points: - -```markdown theme={null} -## Document modification workflow - -1. Determine the modification type: - - **Creating new content?** → Follow "Creation workflow" below - **Editing existing content?** → Follow "Editing workflow" below - -2. Creation workflow: - - Use docx-js library - - Build document from scratch - - Export to .docx format - -3. Editing workflow: - - Unpack existing document - - Modify XML directly - - Validate after each change - - Repack when complete -``` - - - If workflows become large or complicated with many steps, consider pushing them into separate files and tell Claude to read the appropriate file based on the task at hand. - - -## Evaluation and iteration - -### Build evaluations first - -**Create evaluations BEFORE writing extensive documentation.** This ensures your Skill solves real problems rather than documenting imagined ones. - -**Evaluation-driven development:** - -1. **Identify gaps**: Run Claude on representative tasks without a Skill. Document specific failures or missing context -2. **Create evaluations**: Build three scenarios that test these gaps -3. **Establish baseline**: Measure Claude's performance without the Skill -4. **Write minimal instructions**: Create just enough content to address the gaps and pass evaluations -5. **Iterate**: Execute evaluations, compare against baseline, and refine - -This approach ensures you're solving actual problems rather than anticipating requirements that may never materialize. - -**Evaluation structure**: - -```json theme={null} -{ - "skills": ["pdf-processing"], - "query": "Extract all text from this PDF file and save it to output.txt", - "files": ["test-files/document.pdf"], - "expected_behavior": [ - "Successfully reads the PDF file using an appropriate PDF processing library or command-line tool", - "Extracts text content from all pages in the document without missing any pages", - "Saves the extracted text to a file named output.txt in a clear, readable format" - ] -} -``` - - - This example demonstrates a data-driven evaluation with a simple testing rubric. We do not currently provide a built-in way to run these evaluations. Users can create their own evaluation system. Evaluations are your source of truth for measuring Skill effectiveness. - - -### Develop Skills iteratively with Claude - -The most effective Skill development process involves Claude itself. Work with one instance of Claude ("Claude A") to create a Skill that will be used by other instances ("Claude B"). Claude A helps you design and refine instructions, while Claude B tests them in real tasks. This works because Claude models understand both how to write effective agent instructions and what information agents need. - -**Creating a new Skill:** - -1. **Complete a task without a Skill**: Work through a problem with Claude A using normal prompting. As you work, you'll naturally provide context, explain preferences, and share procedural knowledge. Notice what information you repeatedly provide. - -2. **Identify the reusable pattern**: After completing the task, identify what context you provided that would be useful for similar future tasks. - - **Example**: If you worked through a BigQuery analysis, you might have provided table names, field definitions, filtering rules (like "always exclude test accounts"), and common query patterns. - -3. **Ask Claude A to create a Skill**: "Create a Skill that captures this BigQuery analysis pattern we just used. Include the table schemas, naming conventions, and the rule about filtering test accounts." - - - Claude models understand the Skill format and structure natively. You don't need special system prompts or a "writing skills" skill to get Claude to help create Skills. Simply ask Claude to create a Skill and it will generate properly structured SKILL.md content with appropriate frontmatter and body content. - - -4. **Review for conciseness**: Check that Claude A hasn't added unnecessary explanations. Ask: "Remove the explanation about what win rate means - Claude already knows that." - -5. **Improve information architecture**: Ask Claude A to organize the content more effectively. For example: "Organize this so the table schema is in a separate reference file. We might add more tables later." - -6. **Test on similar tasks**: Use the Skill with Claude B (a fresh instance with the Skill loaded) on related use cases. Observe whether Claude B finds the right information, applies rules correctly, and handles the task successfully. - -7. **Iterate based on observation**: If Claude B struggles or misses something, return to Claude A with specifics: "When Claude used this Skill, it forgot to filter by date for Q4. Should we add a section about date filtering patterns?" - -**Iterating on existing Skills:** - -The same hierarchical pattern continues when improving Skills. You alternate between: - -* **Working with Claude A** (the expert who helps refine the Skill) -* **Testing with Claude B** (the agent using the Skill to perform real work) -* **Observing Claude B's behavior** and bringing insights back to Claude A - -1. **Use the Skill in real workflows**: Give Claude B (with the Skill loaded) actual tasks, not test scenarios - -2. **Observe Claude B's behavior**: Note where it struggles, succeeds, or makes unexpected choices - - **Example observation**: "When I asked Claude B for a regional sales report, it wrote the query but forgot to filter out test accounts, even though the Skill mentions this rule." - -3. **Return to Claude A for improvements**: Share the current SKILL.md and describe what you observed. Ask: "I noticed Claude B forgot to filter test accounts when I asked for a regional report. The Skill mentions filtering, but maybe it's not prominent enough?" - -4. **Review Claude A's suggestions**: Claude A might suggest reorganizing to make rules more prominent, using stronger language like "MUST filter" instead of "always filter", or restructuring the workflow section. - -5. **Apply and test changes**: Update the Skill with Claude A's refinements, then test again with Claude B on similar requests - -6. **Repeat based on usage**: Continue this observe-refine-test cycle as you encounter new scenarios. Each iteration improves the Skill based on real agent behavior, not assumptions. - -**Gathering team feedback:** - -1. Share Skills with teammates and observe their usage -2. Ask: Does the Skill activate when expected? Are instructions clear? What's missing? -3. Incorporate feedback to address blind spots in your own usage patterns - -**Why this approach works**: Claude A understands agent needs, you provide domain expertise, Claude B reveals gaps through real usage, and iterative refinement improves Skills based on observed behavior rather than assumptions. - -### Observe how Claude navigates Skills - -As you iterate on Skills, pay attention to how Claude actually uses them in practice. Watch for: - -* **Unexpected exploration paths**: Does Claude read files in an order you didn't anticipate? This might indicate your structure isn't as intuitive as you thought -* **Missed connections**: Does Claude fail to follow references to important files? Your links might need to be more explicit or prominent -* **Overreliance on certain sections**: If Claude repeatedly reads the same file, consider whether that content should be in the main SKILL.md instead -* **Ignored content**: If Claude never accesses a bundled file, it might be unnecessary or poorly signaled in the main instructions - -Iterate based on these observations rather than assumptions. The 'name' and 'description' in your Skill's metadata are particularly critical. Claude uses these when deciding whether to trigger the Skill in response to the current task. Make sure they clearly describe what the Skill does and when it should be used. - -## Anti-patterns to avoid - -### Avoid Windows-style paths - -Always use forward slashes in file paths, even on Windows: - -* ✓ **Good**: `scripts/helper.py`, `reference/guide.md` -* ✗ **Avoid**: `scripts\helper.py`, `reference\guide.md` - -Unix-style paths work across all platforms, while Windows-style paths cause errors on Unix systems. - -### Avoid offering too many options - -Don't present multiple approaches unless necessary: - -````markdown theme={null} -**Bad example: Too many choices** (confusing): -"You can use pypdf, or pdfplumber, or PyMuPDF, or pdf2image, or..." - -**Good example: Provide a default** (with escape hatch): -"Use pdfplumber for text extraction: -```python -import pdfplumber -``` - -For scanned PDFs requiring OCR, use pdf2image with pytesseract instead." -```` - -## Advanced: Skills with executable code - -The sections below focus on Skills that include executable scripts. If your Skill uses only markdown instructions, skip to [Checklist for effective Skills](#checklist-for-effective-skills). - -### Solve, don't punt - -When writing scripts for Skills, handle error conditions rather than punting to Claude. - -**Good example: Handle errors explicitly**: - -```python theme={null} -def process_file(path): - """Process a file, creating it if it doesn't exist.""" - try: - with open(path) as f: - return f.read() - except FileNotFoundError: - # Create file with default content instead of failing - print(f"File {path} not found, creating default") - with open(path, 'w') as f: - f.write('') - return '' - except PermissionError: - # Provide alternative instead of failing - print(f"Cannot access {path}, using default") - return '' -``` - -**Bad example: Punt to Claude**: - -```python theme={null} -def process_file(path): - # Just fail and let Claude figure it out - return open(path).read() -``` - -Configuration parameters should also be justified and documented to avoid "voodoo constants" (Ousterhout's law). If you don't know the right value, how will Claude determine it? - -**Good example: Self-documenting**: - -```python theme={null} -# HTTP requests typically complete within 30 seconds -# Longer timeout accounts for slow connections -REQUEST_TIMEOUT = 30 - -# Three retries balances reliability vs speed -# Most intermittent failures resolve by the second retry -MAX_RETRIES = 3 -``` - -**Bad example: Magic numbers**: - -```python theme={null} -TIMEOUT = 47 # Why 47? -RETRIES = 5 # Why 5? -``` - -### Provide utility scripts - -Even if Claude could write a script, pre-made scripts offer advantages: - -**Benefits of utility scripts**: - -* More reliable than generated code -* Save tokens (no need to include code in context) -* Save time (no code generation required) -* Ensure consistency across uses - -Bundling executable scripts alongside instruction files - -The diagram above shows how executable scripts work alongside instruction files. The instruction file (forms.md) references the script, and Claude can execute it without loading its contents into context. - -**Important distinction**: Make clear in your instructions whether Claude should: - -* **Execute the script** (most common): "Run `analyze_form.py` to extract fields" -* **Read it as reference** (for complex logic): "See `analyze_form.py` for the field extraction algorithm" - -For most utility scripts, execution is preferred because it's more reliable and efficient. See the [Runtime environment](#runtime-environment) section below for details on how script execution works. - -**Example**: - -````markdown theme={null} -## Utility scripts - -**analyze_form.py**: Extract all form fields from PDF - -```bash -python scripts/analyze_form.py input.pdf > fields.json -``` - -Output format: -```json -{ - "field_name": {"type": "text", "x": 100, "y": 200}, - "signature": {"type": "sig", "x": 150, "y": 500} -} -``` - -**validate_boxes.py**: Check for overlapping bounding boxes - -```bash -python scripts/validate_boxes.py fields.json -# Returns: "OK" or lists conflicts -``` - -**fill_form.py**: Apply field values to PDF - -```bash -python scripts/fill_form.py input.pdf fields.json output.pdf -``` -```` - -### Use visual analysis - -When inputs can be rendered as images, have Claude analyze them: - -````markdown theme={null} -## Form layout analysis - -1. Convert PDF to images: - ```bash - python scripts/pdf_to_images.py form.pdf - ``` - -2. Analyze each page image to identify form fields -3. Claude can see field locations and types visually -```` - - - In this example, you'd need to write the `pdf_to_images.py` script. - - -Claude's vision capabilities help understand layouts and structures. - -### Create verifiable intermediate outputs - -When Claude performs complex, open-ended tasks, it can make mistakes. The "plan-validate-execute" pattern catches errors early by having Claude first create a plan in a structured format, then validate that plan with a script before executing it. - -**Example**: Imagine asking Claude to update 50 form fields in a PDF based on a spreadsheet. Without validation, Claude might reference non-existent fields, create conflicting values, miss required fields, or apply updates incorrectly. - -**Solution**: Use the workflow pattern shown above (PDF form filling), but add an intermediate `changes.json` file that gets validated before applying changes. The workflow becomes: analyze → **create plan file** → **validate plan** → execute → verify. - -**Why this pattern works:** - -* **Catches errors early**: Validation finds problems before changes are applied -* **Machine-verifiable**: Scripts provide objective verification -* **Reversible planning**: Claude can iterate on the plan without touching originals -* **Clear debugging**: Error messages point to specific problems - -**When to use**: Batch operations, destructive changes, complex validation rules, high-stakes operations. - -**Implementation tip**: Make validation scripts verbose with specific error messages like "Field 'signature\_date' not found. Available fields: customer\_name, order\_total, signature\_date\_signed" to help Claude fix issues. - -### Package dependencies - -Skills run in the code execution environment with platform-specific limitations: - -* **claude.ai**: Can install packages from npm and PyPI and pull from GitHub repositories -* **Anthropic API**: Has no network access and no runtime package installation - -List required packages in your SKILL.md and verify they're available in the [code execution tool documentation](/en/docs/agents-and-tools/tool-use/code-execution-tool). - -### Runtime environment - -Skills run in a code execution environment with filesystem access, bash commands, and code execution capabilities. For the conceptual explanation of this architecture, see [The Skills architecture](/en/docs/agents-and-tools/agent-skills/overview#the-skills-architecture) in the overview. - -**How this affects your authoring:** - -**How Claude accesses Skills:** - -1. **Metadata pre-loaded**: At startup, the name and description from all Skills' YAML frontmatter are loaded into the system prompt -2. **Files read on-demand**: Claude uses bash Read tools to access SKILL.md and other files from the filesystem when needed -3. **Scripts executed efficiently**: Utility scripts can be executed via bash without loading their full contents into context. Only the script's output consumes tokens -4. **No context penalty for large files**: Reference files, data, or documentation don't consume context tokens until actually read - -* **File paths matter**: Claude navigates your skill directory like a filesystem. Use forward slashes (`reference/guide.md`), not backslashes -* **Name files descriptively**: Use names that indicate content: `form_validation_rules.md`, not `doc2.md` -* **Organize for discovery**: Structure directories by domain or feature - * Good: `reference/finance.md`, `reference/sales.md` - * Bad: `docs/file1.md`, `docs/file2.md` -* **Bundle comprehensive resources**: Include complete API docs, extensive examples, large datasets; no context penalty until accessed -* **Prefer scripts for deterministic operations**: Write `validate_form.py` rather than asking Claude to generate validation code -* **Make execution intent clear**: - * "Run `analyze_form.py` to extract fields" (execute) - * "See `analyze_form.py` for the extraction algorithm" (read as reference) -* **Test file access patterns**: Verify Claude can navigate your directory structure by testing with real requests - -**Example:** - -``` -bigquery-skill/ -├── SKILL.md (overview, points to reference files) -└── reference/ - ├── finance.md (revenue metrics) - ├── sales.md (pipeline data) - └── product.md (usage analytics) -``` - -When the user asks about revenue, Claude reads SKILL.md, sees the reference to `reference/finance.md`, and invokes bash to read just that file. The sales.md and product.md files remain on the filesystem, consuming zero context tokens until needed. This filesystem-based model is what enables progressive disclosure. Claude can navigate and selectively load exactly what each task requires. - -For complete details on the technical architecture, see [How Skills work](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work) in the Skills overview. - -### MCP tool references - -If your Skill uses MCP (Model Context Protocol) tools, always use fully qualified tool names to avoid "tool not found" errors. - -**Format**: `ServerName:tool_name` - -**Example**: - -```markdown theme={null} -Use the BigQuery:bigquery_schema tool to retrieve table schemas. -Use the GitHub:create_issue tool to create issues. -``` - -Where: - -* `BigQuery` and `GitHub` are MCP server names -* `bigquery_schema` and `create_issue` are the tool names within those servers - -Without the server prefix, Claude may fail to locate the tool, especially when multiple MCP servers are available. - -### Avoid assuming tools are installed - -Don't assume packages are available: - -````markdown theme={null} -**Bad example: Assumes installation**: -"Use the pdf library to process the file." - -**Good example: Explicit about dependencies**: -"Install required package: `pip install pypdf` - -Then use it: -```python -from pypdf import PdfReader -reader = PdfReader("file.pdf") -```" -```` - -## Technical notes - -### YAML frontmatter requirements - -The SKILL.md frontmatter includes only `name` (64 characters max) and `description` (1024 characters max) fields. See the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#skill-structure) for complete structure details. - -### Token budgets - -Keep SKILL.md body under 500 lines for optimal performance. If your content exceeds this, split it into separate files using the progressive disclosure patterns described earlier. For architectural details, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work). - -## Checklist for effective Skills - -Before sharing a Skill, verify: - -### Core quality - -* [ ] Description is specific and includes key terms -* [ ] Description includes both what the Skill does and when to use it -* [ ] SKILL.md body is under 500 lines -* [ ] Additional details are in separate files (if needed) -* [ ] No time-sensitive information (or in "old patterns" section) -* [ ] Consistent terminology throughout -* [ ] Examples are concrete, not abstract -* [ ] File references are one level deep -* [ ] Progressive disclosure used appropriately -* [ ] Workflows have clear steps - -### Code and scripts - -* [ ] Scripts solve problems rather than punt to Claude -* [ ] Error handling is explicit and helpful -* [ ] No "voodoo constants" (all values justified) -* [ ] Required packages listed in instructions and verified as available -* [ ] Scripts have clear documentation -* [ ] No Windows-style paths (all forward slashes) -* [ ] Validation/verification steps for critical operations -* [ ] Feedback loops included for quality-critical tasks - -### Testing - -* [ ] At least three evaluations created -* [ ] Tested with Haiku, Sonnet, and Opus -* [ ] Tested with real usage scenarios -* [ ] Team feedback incorporated (if applicable) - -## Next steps - - - - Create your first Skill - - - - Create and manage Skills in Claude Code - - - - Upload and use Skills programmatically - - diff --git a/.claude/skills/writing-skills/graphviz-conventions.dot b/.claude/skills/writing-skills/graphviz-conventions.dot deleted file mode 100644 index f02d8d9..0000000 --- a/.claude/skills/writing-skills/graphviz-conventions.dot +++ /dev/null @@ -1,172 +0,0 @@ -digraph STYLE_GUIDE { - // The style guide for our process DSL, written in the DSL itself - - // Node type examples with their shapes - subgraph cluster_node_types { - label="NODE TYPES AND SHAPES"; - - // Questions are diamonds - "Is this a question?" [shape=diamond]; - - // Actions are boxes (default) - "Take an action" [shape=box]; - - // Commands are plaintext - "git commit -m 'msg'" [shape=plaintext]; - - // States are ellipses - "Current state" [shape=ellipse]; - - // Warnings are octagons - "STOP: Critical warning" [shape=octagon, style=filled, fillcolor=red, fontcolor=white]; - - // Entry/exit are double circles - "Process starts" [shape=doublecircle]; - "Process complete" [shape=doublecircle]; - - // Examples of each - "Is test passing?" [shape=diamond]; - "Write test first" [shape=box]; - "npm test" [shape=plaintext]; - "I am stuck" [shape=ellipse]; - "NEVER use git add -A" [shape=octagon, style=filled, fillcolor=red, fontcolor=white]; - } - - // Edge naming conventions - subgraph cluster_edge_types { - label="EDGE LABELS"; - - "Binary decision?" [shape=diamond]; - "Yes path" [shape=box]; - "No path" [shape=box]; - - "Binary decision?" -> "Yes path" [label="yes"]; - "Binary decision?" -> "No path" [label="no"]; - - "Multiple choice?" [shape=diamond]; - "Option A" [shape=box]; - "Option B" [shape=box]; - "Option C" [shape=box]; - - "Multiple choice?" -> "Option A" [label="condition A"]; - "Multiple choice?" -> "Option B" [label="condition B"]; - "Multiple choice?" -> "Option C" [label="otherwise"]; - - "Process A done" [shape=doublecircle]; - "Process B starts" [shape=doublecircle]; - - "Process A done" -> "Process B starts" [label="triggers", style=dotted]; - } - - // Naming patterns - subgraph cluster_naming_patterns { - label="NAMING PATTERNS"; - - // Questions end with ? - "Should I do X?"; - "Can this be Y?"; - "Is Z true?"; - "Have I done W?"; - - // Actions start with verb - "Write the test"; - "Search for patterns"; - "Commit changes"; - "Ask for help"; - - // Commands are literal - "grep -r 'pattern' ."; - "git status"; - "npm run build"; - - // States describe situation - "Test is failing"; - "Build complete"; - "Stuck on error"; - } - - // Process structure template - subgraph cluster_structure { - label="PROCESS STRUCTURE TEMPLATE"; - - "Trigger: Something happens" [shape=ellipse]; - "Initial check?" [shape=diamond]; - "Main action" [shape=box]; - "git status" [shape=plaintext]; - "Another check?" [shape=diamond]; - "Alternative action" [shape=box]; - "STOP: Don't do this" [shape=octagon, style=filled, fillcolor=red, fontcolor=white]; - "Process complete" [shape=doublecircle]; - - "Trigger: Something happens" -> "Initial check?"; - "Initial check?" -> "Main action" [label="yes"]; - "Initial check?" -> "Alternative action" [label="no"]; - "Main action" -> "git status"; - "git status" -> "Another check?"; - "Another check?" -> "Process complete" [label="ok"]; - "Another check?" -> "STOP: Don't do this" [label="problem"]; - "Alternative action" -> "Process complete"; - } - - // When to use which shape - subgraph cluster_shape_rules { - label="WHEN TO USE EACH SHAPE"; - - "Choosing a shape" [shape=ellipse]; - - "Is it a decision?" [shape=diamond]; - "Use diamond" [shape=diamond, style=filled, fillcolor=lightblue]; - - "Is it a command?" [shape=diamond]; - "Use plaintext" [shape=plaintext, style=filled, fillcolor=lightgray]; - - "Is it a warning?" [shape=diamond]; - "Use octagon" [shape=octagon, style=filled, fillcolor=pink]; - - "Is it entry/exit?" [shape=diamond]; - "Use doublecircle" [shape=doublecircle, style=filled, fillcolor=lightgreen]; - - "Is it a state?" [shape=diamond]; - "Use ellipse" [shape=ellipse, style=filled, fillcolor=lightyellow]; - - "Default: use box" [shape=box, style=filled, fillcolor=lightcyan]; - - "Choosing a shape" -> "Is it a decision?"; - "Is it a decision?" -> "Use diamond" [label="yes"]; - "Is it a decision?" -> "Is it a command?" [label="no"]; - "Is it a command?" -> "Use plaintext" [label="yes"]; - "Is it a command?" -> "Is it a warning?" [label="no"]; - "Is it a warning?" -> "Use octagon" [label="yes"]; - "Is it a warning?" -> "Is it entry/exit?" [label="no"]; - "Is it entry/exit?" -> "Use doublecircle" [label="yes"]; - "Is it entry/exit?" -> "Is it a state?" [label="no"]; - "Is it a state?" -> "Use ellipse" [label="yes"]; - "Is it a state?" -> "Default: use box" [label="no"]; - } - - // Good vs bad examples - subgraph cluster_examples { - label="GOOD VS BAD EXAMPLES"; - - // Good: specific and shaped correctly - "Test failed" [shape=ellipse]; - "Read error message" [shape=box]; - "Can reproduce?" [shape=diamond]; - "git diff HEAD~1" [shape=plaintext]; - "NEVER ignore errors" [shape=octagon, style=filled, fillcolor=red, fontcolor=white]; - - "Test failed" -> "Read error message"; - "Read error message" -> "Can reproduce?"; - "Can reproduce?" -> "git diff HEAD~1" [label="yes"]; - - // Bad: vague and wrong shapes - bad_1 [label="Something wrong", shape=box]; // Should be ellipse (state) - bad_2 [label="Fix it", shape=box]; // Too vague - bad_3 [label="Check", shape=box]; // Should be diamond - bad_4 [label="Run command", shape=box]; // Should be plaintext with actual command - - bad_1 -> bad_2; - bad_2 -> bad_3; - bad_3 -> bad_4; - } -} diff --git a/.claude/skills/writing-skills/persuasion-principles.md b/.claude/skills/writing-skills/persuasion-principles.md deleted file mode 100644 index 9818a5f..0000000 --- a/.claude/skills/writing-skills/persuasion-principles.md +++ /dev/null @@ -1,187 +0,0 @@ -# Persuasion Principles for Skill Design - -## Overview - -LLMs respond to the same persuasion principles as humans. Understanding this psychology helps you design more effective skills - not to manipulate, but to ensure critical practices are followed even under pressure. - -**Research foundation:** Meincke et al. (2025) tested 7 persuasion principles with N=28,000 AI conversations. Persuasion techniques more than doubled compliance rates (33% → 72%, p < .001). - -## The Seven Principles - -### 1. Authority -**What it is:** Deference to expertise, credentials, or official sources. - -**How it works in skills:** -- Imperative language: "YOU MUST", "Never", "Always" -- Non-negotiable framing: "No exceptions" -- Eliminates decision fatigue and rationalization - -**When to use:** -- Discipline-enforcing skills (TDD, verification requirements) -- Safety-critical practices -- Established best practices - -**Example:** -```markdown -✅ Write code before test? Delete it. Start over. No exceptions. -❌ Consider writing tests first when feasible. -``` - -### 2. Commitment -**What it is:** Consistency with prior actions, statements, or public declarations. - -**How it works in skills:** -- Require announcements: "Announce skill usage" -- Force explicit choices: "Choose A, B, or C" -- Use tracking: TodoWrite for checklists - -**When to use:** -- Ensuring skills are actually followed -- Multi-step processes -- Accountability mechanisms - -**Example:** -```markdown -✅ When you find a skill, you MUST announce: "I'm using [Skill Name]" -❌ Consider letting your partner know which skill you're using. -``` - -### 3. Scarcity -**What it is:** Urgency from time limits or limited availability. - -**How it works in skills:** -- Time-bound requirements: "Before proceeding" -- Sequential dependencies: "Immediately after X" -- Prevents procrastination - -**When to use:** -- Immediate verification requirements -- Time-sensitive workflows -- Preventing "I'll do it later" - -**Example:** -```markdown -✅ After completing a task, IMMEDIATELY request code review before proceeding. -❌ You can review code when convenient. -``` - -### 4. Social Proof -**What it is:** Conformity to what others do or what's considered normal. - -**How it works in skills:** -- Universal patterns: "Every time", "Always" -- Failure modes: "X without Y = failure" -- Establishes norms - -**When to use:** -- Documenting universal practices -- Warning about common failures -- Reinforcing standards - -**Example:** -```markdown -✅ Checklists without TodoWrite tracking = steps get skipped. Every time. -❌ Some people find TodoWrite helpful for checklists. -``` - -### 5. Unity -**What it is:** Shared identity, "we-ness", in-group belonging. - -**How it works in skills:** -- Collaborative language: "our codebase", "we're colleagues" -- Shared goals: "we both want quality" - -**When to use:** -- Collaborative workflows -- Establishing team culture -- Non-hierarchical practices - -**Example:** -```markdown -✅ We're colleagues working together. I need your honest technical judgment. -❌ You should probably tell me if I'm wrong. -``` - -### 6. Reciprocity -**What it is:** Obligation to return benefits received. - -**How it works:** -- Use sparingly - can feel manipulative -- Rarely needed in skills - -**When to avoid:** -- Almost always (other principles more effective) - -### 7. Liking -**What it is:** Preference for cooperating with those we like. - -**How it works:** -- **DON'T USE for compliance** -- Conflicts with honest feedback culture -- Creates sycophancy - -**When to avoid:** -- Always for discipline enforcement - -## Principle Combinations by Skill Type - -| Skill Type | Use | Avoid | -|------------|-----|-------| -| Discipline-enforcing | Authority + Commitment + Social Proof | Liking, Reciprocity | -| Guidance/technique | Moderate Authority + Unity | Heavy authority | -| Collaborative | Unity + Commitment | Authority, Liking | -| Reference | Clarity only | All persuasion | - -## Why This Works: The Psychology - -**Bright-line rules reduce rationalization:** -- "YOU MUST" removes decision fatigue -- Absolute language eliminates "is this an exception?" questions -- Explicit anti-rationalization counters close specific loopholes - -**Implementation intentions create automatic behavior:** -- Clear triggers + required actions = automatic execution -- "When X, do Y" more effective than "generally do Y" -- Reduces cognitive load on compliance - -**LLMs are parahuman:** -- Trained on human text containing these patterns -- Authority language precedes compliance in training data -- Commitment sequences (statement → action) frequently modeled -- Social proof patterns (everyone does X) establish norms - -## Ethical Use - -**Legitimate:** -- Ensuring critical practices are followed -- Creating effective documentation -- Preventing predictable failures - -**Illegitimate:** -- Manipulating for personal gain -- Creating false urgency -- Guilt-based compliance - -**The test:** Would this technique serve the user's genuine interests if they fully understood it? - -## Research Citations - -**Cialdini, R. B. (2021).** *Influence: The Psychology of Persuasion (New and Expanded).* Harper Business. -- Seven principles of persuasion -- Empirical foundation for influence research - -**Meincke, L., Shapiro, D., Duckworth, A. L., Mollick, E., Mollick, L., & Cialdini, R. (2025).** Call Me A Jerk: Persuading AI to Comply with Objectionable Requests. University of Pennsylvania. -- Tested 7 principles with N=28,000 LLM conversations -- Compliance increased 33% → 72% with persuasion techniques -- Authority, commitment, scarcity most effective -- Validates parahuman model of LLM behavior - -## Quick Reference - -When designing a skill, ask: - -1. **What type is it?** (Discipline vs. guidance vs. reference) -2. **What behavior am I trying to change?** -3. **Which principle(s) apply?** (Usually authority + commitment for discipline) -4. **Am I combining too many?** (Don't use all seven) -5. **Is this ethical?** (Serves user's genuine interests?) diff --git a/.claude/status_lines/lazy_status.py b/.claude/status_lines/lazy_status.py index 6f0bed7..eec076c 100644 --- a/.claude/status_lines/lazy_status.py +++ b/.claude/status_lines/lazy_status.py @@ -8,11 +8,15 @@ - MCP memory configuration presence (workspace .mcp.json or template) - basic runtime availability hints (npx present) """ + from __future__ import annotations -import json, os, shutil +import json +import os +import shutil from pathlib import Path from datetime import datetime + def detect_mcp_config() -> dict: cwd = Path.cwd() workspace_cfg = cwd / ".mcp.json" @@ -30,11 +34,18 @@ def detect_mcp_config() -> dict: pass return {"configured": configured, "has_memory": has_memory, "npx": npx_ok} + def main() -> None: model = os.getenv("ENRICHMENT_MODEL") mcp = detect_mcp_config() - payload = {"lazy": "ok", "model": model or "default", "mcp": mcp, "time": datetime.now().strftime("%H:%M")} + payload = { + "lazy": "ok", + "model": model or "default", + "mcp": mcp, + "time": datetime.now().strftime("%H:%M"), + } print(json.dumps(payload, separators=(",", ":"))) + if __name__ == "__main__": main() diff --git a/.githooks/README.md b/.githooks/README.md new file mode 100644 index 0000000..f82a5fc --- /dev/null +++ b/.githooks/README.md @@ -0,0 +1,123 @@ +# Git Hooks for LAZY-DEV Framework + +This directory contains custom git hooks for the LAZY-DEV framework. + +## Setup + +After cloning the repository, run: + +```bash +git config core.hooksPath .githooks +``` + +This tells git to use hooks from this directory instead of `.git/hooks/`. + +## Available Hooks + +### pre-commit + +**Purpose**: Automatically format all staged files before commit. + +**What it does**: +- Formats Python files with Black + Ruff +- Formats JS/TS/JSON/YAML/Markdown with Prettier +- Re-stages formatted files automatically +- Prevents unformatted code from being committed + +**Languages supported**: +- Python (.py) → Black + Ruff +- JavaScript/TypeScript (.js, .jsx, .ts, .tsx) → Prettier +- JSON (.json) → Prettier +- YAML (.yml, .yaml) → Prettier +- Markdown (.md) → Prettier + +**Requirements**: +- Python 3.11+ +- Black (`pip install black`) +- Ruff (`pip install ruff`) +- Node.js (for Prettier via npx) + +**Behavior**: +1. Gets list of staged files +2. Formats each file type with appropriate formatter +3. Re-adds formatted files to staging area +4. Commit proceeds with formatted code + +**Skip the hook** (if needed): +```bash +git commit --no-verify -m "message" +``` + +**Manual test**: +```bash +# Stage a file +git add some_file.py + +# Run hook manually +.githooks/pre-commit + +# Check if file was formatted +git diff --cached some_file.py +``` + +## Why Version-Controlled Hooks? + +**Benefits**: +- ✅ Shared across entire team +- ✅ Version controlled (changes tracked) +- ✅ No manual setup per developer (just `git config`) +- ✅ Consistent formatting enforcement + +**Traditional `.git/hooks/`**: +- ❌ Not version controlled +- ❌ Each developer must install manually +- ❌ Easy to forget or skip + +## Adding New Hooks + +1. Create hook script in `.githooks/` +2. Make it executable: `chmod +x .githooks/` +3. Document in this README +4. Commit to repository + +## Troubleshooting + +**Hook not running?** +```bash +# Verify hooks path is configured +git config core.hooksPath +# Should output: .githooks + +# If not set: +git config core.hooksPath .githooks +``` + +**Hook failing?** +```bash +# Check hook is executable +ls -la .githooks/pre-commit + +# Make executable if needed +chmod +x .githooks/pre-commit + +# Test hook manually +.githooks/pre-commit +``` + +**Formatters not found?** +```bash +# Install Python formatters +pip install black ruff + +# Prettier is installed via npx (auto-downloads) +npx -y prettier --version +``` + +## Integration with CI + +The pre-commit hook provides **local enforcement**. CI provides **final enforcement**: + +- **Local** (pre-commit): Fast feedback, prevents most issues +- **CI** (GitHub Actions): Safety net, catches anything that bypassed local checks + +Both use the same formatters, ensuring consistency. diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100644 index 0000000..cfd543f --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,132 @@ +#!/usr/bin/env python +""" +Pre-commit hook for LAZY-DEV Framework. + +Automatically formats all staged files before commit: +- Python files: Black + Ruff +- JavaScript/TypeScript: Prettier (if available) +- Prevents unformatted code from being committed + +Installation: + git config core.hooksPath .githooks +""" + +import subprocess +import sys +from pathlib import Path + + +def get_staged_files(): + """Get list of staged files.""" + result = subprocess.run( + ["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"], + capture_output=True, + text=True, + ) + if result.returncode != 0: + return [] + return [f for f in result.stdout.strip().split("\n") if f] + + +def format_python_files(files): + """Format Python files with Black and Ruff.""" + py_files = [f for f in files if f.endswith(".py")] + if not py_files: + return True + + print(f"Formatting {len(py_files)} Python file(s)...") + + # Run Black + try: + result = subprocess.run( + [sys.executable, "-m", "black", "--quiet"] + py_files, + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + print(f"Black formatting failed: {result.stderr}") + return False + except (subprocess.SubprocessError, FileNotFoundError): + print("Warning: Black not available, skipping Python formatting") + + # Run Ruff format + try: + result = subprocess.run( + [sys.executable, "-m", "ruff", "format"] + py_files, + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + print(f"Ruff formatting failed: {result.stderr}") + return False + except (subprocess.SubprocessError, FileNotFoundError): + print("Warning: Ruff not available, skipping Python formatting") + + # Re-stage formatted files + subprocess.run(["git", "add"] + py_files) + print(f"[OK] Formatted {len(py_files)} Python file(s)") + return True + + +def format_js_files(files): + """Format JavaScript/TypeScript files with Prettier.""" + js_files = [ + f + for f in files + if f.endswith((".js", ".jsx", ".ts", ".tsx", ".json", ".yml", ".yaml", ".md")) + ] + if not js_files: + return True + + print(f"Formatting {len(js_files)} JS/TS/JSON/Markdown file(s)...") + + try: + result = subprocess.run( + ["npx", "-y", "prettier", "--write", "--loglevel", "error"] + js_files, + capture_output=True, + text=True, + timeout=60, + ) + if result.returncode != 0: + print(f"Prettier formatting failed: {result.stderr}") + return False + + # Re-stage formatted files + subprocess.run(["git", "add"] + js_files) + print(f"[OK] Formatted {len(js_files)} file(s) with Prettier") + except (subprocess.SubprocessError, FileNotFoundError): + print("Warning: Prettier not available, skipping JS/TS/Markdown formatting") + + return True + + +def main(): + """Run pre-commit checks and formatting.""" + print("Running pre-commit formatting...") + + # Get staged files + staged_files = get_staged_files() + if not staged_files: + print("No staged files to format") + return 0 + + print(f"Checking {len(staged_files)} staged file(s)...") + + # Format Python files + if not format_python_files(staged_files): + print("[FAIL] Python formatting failed") + return 1 + + # Format JS/TS/JSON/Markdown files + if not format_js_files(staged_files): + print("[FAIL] JS/TS formatting failed") + return 1 + + print("[SUCCESS] All files formatted successfully") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2fa07d0..b28968f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -74,7 +74,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ['3.11', '3.12'] + python-version: ['3.11', '3.12', '3.13'] steps: - name: Checkout code @@ -86,12 +86,54 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' + - name: Set up Node.js (for MCP Memory test) + uses: actions/setup-node@v4 + with: + node-version: '18' + - name: Install dependencies run: | python -m pip install --upgrade pip pip install pytest pytest-cov pip install -r requirements.txt + - name: Test hook execution (cross-platform) + run: | + echo "Testing hooks can execute on ${{ matrix.os }}" + python .claude/hooks/session_start.py 2>&1 || echo "Hook executed (JSON error expected)" + python .claude/hooks/user_prompt_submit.py 2>&1 || echo "Hook executed (JSON error expected)" + python .claude/hooks/post_tool_use_format.py 2>&1 || echo "Hook executed (JSON error expected)" + echo "✅ All hooks executable on ${{ matrix.os }}" + shell: bash + + - name: Test quality scripts (cross-platform) + run: | + echo "Testing quality scripts on ${{ matrix.os }}" + python scripts/format.py --help + python scripts/lint.py --help + python scripts/type_check.py --help + python scripts/test_runner.py --help + echo "✅ All scripts work on ${{ matrix.os }}" + shell: bash + + - name: Test MCP Memory server (cross-platform) + run: | + echo "Testing MCP Memory on ${{ matrix.os }}" + timeout 5 npx -y @modelcontextprotocol/server-memory || echo "✅ MCP server works on ${{ matrix.os }}" + shell: bash + continue-on-error: true + + - name: Test skills structure + run: | + count=$(find .claude/skills -name "SKILL.md" | wc -l) + echo "Found $count skills" + if [ $count -ne 22 ]; then + echo "ERROR: Expected 22 skills, found $count" + exit 1 + fi + echo "✅ All 22 skills present on ${{ matrix.os }}" + shell: bash + - name: Run tests (if tests exist) run: | if [ -d "tests" ]; then diff --git a/.gitignore b/.gitignore index a007940..4a2ea90 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ temp_mcp_docs/ # Claude Code runtime data and logs .claude/data/ -logs/ +.claude/memory/memory.jsonl # Python virtual environment .venv/ @@ -11,3 +11,6 @@ __pycache__/ *.pyo *.pyd .Python + +# Ruff cache +.ruff_cache/ diff --git a/CLAUDE.md b/CLAUDE.md index b87ce31..5930060 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -20,6 +20,49 @@ Automate mundane tasks (formatting, commits, PRs) while enforcing discipline (qu Commands are your primary interface to LAZY_DEV. Think of them as orchestrators that coordinate agents, skills, and quality checks. +### Project Initialization Commands + +#### `/lazy init-project` + +**When to use:** Bootstrap a new project with complete documentation suite from a project idea. + +**Input:** Project description or enhanced prompt file. + +**What it does:** +- Takes project description/prompt +- Invokes project-planner skill → PROJECT-OVERVIEW.md + SPECIFICATIONS.md +- Invokes tech-stack-architect skill → TECH-STACK.md + ARCHITECTURE.md +- Creates .meta/last-sync.json for tracking +- Stages files for git review + +**Examples:** +```bash +# From description +/lazy init-project "Build a SaaS task management platform with real-time collaboration" + +# From file +/lazy init-project --file project_brief.md + +# Minimal mode (skip architecture) +/lazy init-project "Simple REST API" --minimal +``` + +**Triggers:** +- `project-planner` skill - generates overview and specs +- `tech-stack-architect` skill - selects stack and designs architecture + +**Output:** +``` +./project-management/ +├── PROJECT-OVERVIEW.md +├── SPECIFICATIONS.md +├── TECH-STACK.md +├── ARCHITECTURE.md +└── .meta/last-sync.json +``` + +--- + ### Planning Commands #### `/lazy plan` @@ -316,6 +359,37 @@ Story review incomplete - 2 CRITICAL, 3 WARNING, 1 SUGGESTION issues found. --- +#### `/lazy question` + +**When to use:** Ask questions about the codebase or general technical topics. + +**Input:** Question as string. + +**What it does:** +- Analyzes question to determine if it's codebase-specific or general knowledge +- **For codebase questions**: Searches files with Grep/Glob, reads relevant code, answers with file:line citations +- **For general questions**: Delegates to research agent for documentation/web lookup +- **IMPORTANT**: Creates NO files, NO documentation, NO commits - only answers questions + +**Examples:** +```bash +# Codebase questions +/lazy question "where is user authentication handled?" +/lazy question "how does the payment processor work?" +/lazy question "what files implement the REST API?" + +# General questions (uses research agent) +/lazy question "what is the difference between JWT and session tokens?" +/lazy question "how to implement OAuth2 in Python?" +/lazy question "best practices for API versioning?" +``` + +**Output:** +- Inline answer with citations (codebase) or sources (general) +- No artifact creation + +--- + #### `/lazy memory-graph` **When to use:** Manually persist durable facts to the project's knowledge graph. @@ -371,165 +445,197 @@ Story review incomplete - 2 CRITICAL, 3 WARNING, 1 SUGGESTION issues found. ## Skills -Skills are reusable patterns that enhance commands and agents. They're automatically injected or manually invoked based on context. +Skills are reusable patterns that enhance commands and agents. **Skills are MODEL-INVOKED** - I (Claude) autonomously decide when to use them based on your request and the skill's description. -### Auto-Triggered Skills +### How Skills Work (Anthropic Best Practice) -These activate automatically during command execution: +**Autonomous Activation:** +- Skills are NOT user-invoked commands (you don't type `/skill-name`) +- I load skills automatically when relevant to your task +- Decision based on: skill description matching your request context +- No explicit triggering needed - it happens naturally -#### `brainstorming` +**Manual Testing:** +- Use Skill tool to manually invoke: `Skill(command="skill-name")` +- Useful for testing or explicit skill activation -**Triggers:** During `/lazy plan` when multiple design approaches exist. +**Key Principle:** "The description field is critical for skill discovery" - Anthropic -**What it does:** Generates 3-5 implementation options with pros/cons, recommends one with rationale. +### All Available Skills (22 Total) -**Output:** Table with Option | Pros | Cons | Effort | Risk - ---- +Skills are organized by type. Each skill's description tells me when to activate it. -#### `memory-graph` +#### Planning & Design Skills -**Triggers:** When UserPromptSubmit hook detects durable facts (ownership, decisions, endpoints). +**`brainstorming`** (.claude/skills/brainstorming/SKILL.md) +- **Description:** Structured ideation for options, trade-offs, and a clear decision +- **When I use it:** User mentions "brainstorm", "options", "approaches", "design choices", or when planning features with multiple viable implementations +- **Output:** Table with Option | Pros | Cons | Effort | Risk + recommendation -**What it does:** Persists facts to project's knowledge graph via MCP Memory. +**`task-slicer`** (.claude/skills/task-slicer/SKILL.md) +- **Description:** Split features into atomic 2–4h tasks with independent tests and minimal dependencies +- **When I use it:** Breaking down user stories into implementable tasks during `/lazy plan` +- **Output:** 3-10 tasks with descriptions, files, dependencies, estimates -**Disable:** Set `LAZYDEV_DISABLE_MEMORY_SKILL=1` +**`ac-expander`** (.claude/skills/ac-expander/SKILL.md) +- **Description:** Turn vague Acceptance Criteria into measurable checks and test assertions +- **When I use it:** Story creation or when ACs need clarification +- **Output:** Expanded, testable acceptance criteria -**Examples of auto-detection:** -- "service:api owned by Alice" -- "repo:backend endpoint:https://api.example.com" -- "decision: use Postgres for better transactions" +**`story-traceability`** (.claude/skills/story-traceability/SKILL.md) +- **Description:** Ensure Acceptance Criteria map to Tasks and Tests for PR-per-story workflow +- **When I use it:** Story review to verify completeness +- **Output:** Traceability matrix linking ACs → Tasks → Tests --- -#### `output-style-selector` +#### Development Skills -**Triggers:** During UserPromptSubmit hook on every command. +**`test-driven-development`** (.claude/skills/test-driven-development/SKILL.md) +- **Description:** Enforce RED→GREEN→REFACTOR micro-cycles and keep diffs minimal +- **When I use it:** Implementing features when TDD is required or requested +- **Output:** Test-first workflow with small incremental changes -**What it does:** Selects appropriate output format (table, list, code, prose) based on command context. +**`diff-scope-minimizer`** (.claude/skills/diff-scope-minimizer/SKILL.md) +- **Description:** Keep changes narrowly scoped with a tiny patch plan and stop criteria +- **When I use it:** Preventing scope creep during implementation +- **Output:** Minimal diff strategy with clear boundaries -**Styles:** -- `table-based` - For comparisons and matrices -- `list-based` - For action items and steps -- `code-first` - For technical implementations -- `prose` - For explanations and reports +**`code-review-request`** (.claude/skills/code-review-request/SKILL.md) +- **Description:** Request and process code review efficiently with a simple rubric and patch plan +- **When I use it:** Complex or security-sensitive changes need review +- **Output:** Review checklist and feedback ---- - -#### `context-packer` - -**Triggers:** Before sub-agent calls and during UserPromptSubmit enrichment. - -**What it does:** Summarizes relevant context (files, symbols, commits) to reduce token usage. - -**Output:** 10-20 line brief with: -- Key file paths -- Important symbols/functions -- Last 3 relevant commits -- Pointers to exact lines (not full files) +**`regression-testing`** (.claude/skills/regression-testing/SKILL.md) +- **Description:** Use after bug fixes to evaluate need for regression tests and implement them. Triggers when bugs are fixed to prevent future regressions. Claude decides if regression tests add value based on bug severity, code complexity, and existing coverage. +- **When I use it:** After bug fixes are implemented, to decide if regression tests would be valuable +- **Output:** Evaluation decision (add/skip test) + regression test implementation if valuable --- -### Manual-Triggered Skills +#### Quality & Review Skills -These are invoked explicitly by commands or when specific patterns are detected: +**`security-audit`** (.claude/skills/security-audit/SKILL.md) +- **Description:** Triggers for authentication, payments, user input, and API endpoints to check OWASP risks. Auto-evaluates security need and provides actionable fixes, not checklists. +- **When I use it:** Auth, login, payment, API endpoints, user input, SQL queries, file uploads +- **Output:** Risk-level assessment + specific security issues with fast fixes -#### `story-traceability` +**`breaking-change-detector`** (.claude/skills/breaking-change-detector/SKILL.md) +- **Description:** Detects backward-incompatible changes to public APIs, function signatures, endpoints, and data schemas before they break production. Suggests migration paths. +- **When I use it:** Modifying public APIs, endpoints, function signatures, data models +- **Output:** Breaking changes detected + backward-compatible fixes + version bump recommendation -**When to use:** Map acceptance criteria to tasks to tests. +**`error-handling-completeness`** (.claude/skills/error-handling-completeness/SKILL.md) +- **Description:** Evaluates if error handling is sufficient for new code - checks try-catch coverage, logging, user messages, retry logic. Focuses on external calls and user-facing code. +- **When I use it:** API calls, database operations, file I/O, async code, type conversions +- **Output:** Missing error handling + specific fixes with code examples -**Used by:** `project-manager` agent during story creation, `reviewer-story` agent during review. - -**What it does:** Ensures every AC has corresponding tasks, every task has tests, creates traceability matrix. +**`performance-budget-checker`** (.claude/skills/performance-budget-checker/SKILL.md) +- **Description:** Detects performance anti-patterns like N+1 queries, nested loops, large file operations, and inefficient algorithms. Suggests fast fixes before issues reach production. +- **When I use it:** Database queries, loops, file operations, API calls, list operations +- **Output:** Performance issues + complexity analysis + optimization fixes --- -#### `task-slicer` - -**When to use:** Break large features into atomic, estimable tasks. +#### Context & Optimization Skills -**Used by:** `project-manager` agent during story creation. +**`context-packer`** (.claude/skills/context-packer/SKILL.md) +- **Description:** Build a compact, high-signal context brief (files, symbols, recent commits) instead of pasting large code blocks +- **When I use it:** Before sub-agent calls, large codebases, token optimization needed +- **Output:** 10-20 line brief with pointers, not full files -**What it does:** -- Analyzes feature scope -- Creates tasks of 2-4 hours each -- Ensures tasks are independent and testable -- Assigns estimates (S/M/L) +**`output-style-selector`** (.claude/skills/output-style-selector/SKILL.md) +- **Description:** Automatically choose the best output style (tables, bullets, YAML, HTML, concise) to improve scanability and save tokens +- **When I use it:** Every prompt to select optimal format +- **Output:** Style declaration (table-based, bullet-points, etc.) --- -#### `git-worktrees` - -**When to use:** Work on multiple stories in parallel with isolated environments. +#### Integration & Workflow Skills -**Used by:** Manually when needed. +**`gh-issue-sync`** (.claude/skills/gh-issue-sync/SKILL.md) +- **Description:** Create or update GitHub issue for the story and sub-issues for tasks +- **When I use it:** Story creation with GitHub integration +- **Output:** GitHub issues created/updated -**What it does:** -- Creates separate working directories for each story -- Maintains isolated dependencies -- Prevents branch switching conflicts +**`git-worktrees`** (.claude/skills/git-worktrees/SKILL.md) +- **Description:** Use Git worktrees to isolate tasks and keep diffs small and parallelizable +- **When I use it:** User wants parallel development or branch isolation +- **Output:** Worktree setup instructions -**Example:** -```bash -# Create worktree for new story -git worktree add ../project-US-3.4 feat/US-3.4-oauth2 +**`memory-graph`** (.claude/skills/memory-graph/SKILL.md) +- **Description:** Persistent memory graph skill using the MCP Memory server +- **When I use it:** Durable facts detected (ownership, decisions, endpoints) or explicit memory requests +- **Output:** MCP Memory tool calls to persist/retrieve knowledge +- **Disable:** Set `LAZYDEV_DISABLE_MEMORY_SKILL=1` -# Work in isolation -cd ../project-US-3.4 -/lazy code TASK-001 - -# Cleanup when done -git worktree remove ../project-US-3.4 -``` +**`finishing-a-development-branch`** (.claude/skills/finishing-a-development-branch/SKILL.md) +- **Description:** Use when implementation is complete, all tests pass, and you need to decide how to integrate the work - guides completion of development work by presenting structured options for merge, PR, or cleanup +- **When I use it:** Task/story completion, ready for integration +- **Output:** Options for merge, PR creation, or cleanup --- -#### `test-driven-development` +#### Project Initialization Skills -**When to use:** TDD workflow enforcement. +**`project-planner`** (.claude/skills/project-planner/SKILL.md) +- **Description:** Generates project overview and specifications from project prompt. Creates PROJECT-OVERVIEW.md with vision/goals/features and SPECIFICATIONS.md with functional/non-functional requirements. +- **When I use it:** Starting new projects, creating project documentation +- **Output:** PROJECT-OVERVIEW.md + SPECIFICATIONS.md -**Used by:** `coder` agent when tests are required. +**`tech-stack-architect`** (.claude/skills/tech-stack-architect/SKILL.md) +- **Description:** Selects appropriate tech stack and designs system architecture. Creates TECH-STACK.md with technology choices/rationale and ARCHITECTURE.md with diagrams. +- **When I use it:** After project planning, when designing technical architecture +- **Output:** TECH-STACK.md + ARCHITECTURE.md with mermaid diagrams -**What it does:** Enforces RED → GREEN → REFACTOR cycle: -1. Write failing test (RED) -2. Minimal implementation (GREEN) -3. Improve code quality (REFACTOR) +**`project-docs-sync`** (.claude/skills/project-docs-sync/SKILL.md) +- **Description:** Automatically syncs project documentation when significant changes occur. Conservative auto-trigger on tech stack changes, architecture refactors, or requirement updates. +- **When I use it:** Auto-triggers on PostToolUse when project-management/ files change +- **Output:** Sync report with updated documentation -**Enabled when:** -- Project has test framework (pytest.ini, jest.config.js) -- TDD mentioned in CLAUDE.md or README -- `LAZYDEV_ENFORCE_TDD=1` environment variable set +**`agent-selector`** (.claude/skills/agent-selector/SKILL.md) +- **Description:** Analyzes user prompts and recommends the best specialized agent for the task (tester, research, reviewer, refactor, documentation, cleanup, coder). Auto-triggers on UserPromptSubmit to route work efficiently. +- **When I use it:** Auto-triggers on every prompt to intelligently delegate to specialized agents +- **Output:** Agent recommendation with rationale --- -#### `code-review-checklist` +### Skill Best Practices (From Anthropic) -**When to use:** Comprehensive review criteria. +**1. Effective Descriptions:** +- Include BOTH what the skill does AND when to use it +- Use specific trigger words users would mention +- Example: "Use when facing 3+ independent failures..." (not just "helps with failures") -**Used by:** `reviewer` and `reviewer-story` agents. +**2. Keep Skills Focused:** +- One skill = one capability +- Split broad skills (e.g., "document processing") into focused ones -**Checks:** -- Code quality (readability, maintainability) -- Correctness (meets ACs, edge cases) -- Security (OWASP Top 10) -- Testing (coverage, edge cases) -- Documentation (public APIs) +**3. File Organization:** +- Personal skills: `~/.claude/skills/skill-name/` (user-specific) +- Project skills: `.claude/skills/skill-name/` (team-shared via git) +- Supporting files: reference.md, examples.md, scripts/, templates/ +**4. YAML Frontmatter Requirements:** +```yaml --- +name: skill-name # lowercase, hyphens, max 64 chars (REQUIRED) +description: What it does and when to use it (REQUIRED, max 1024 chars) +allowed-tools: Read, Write, Bash # Optional: restrict tool access +--- +``` -#### `security-scanner` - -**When to use:** OWASP Top 10 validation. - -**Used by:** Review agents for security-sensitive code (auth, payment, data handling). +**5. Manual Testing:** +```bash +# Use Skill tool to manually trigger +Skill(command="skill-name") +``` -**Checks:** -- Input validation -- SQL injection prevention -- XSS prevention -- Authentication/authorization -- Secrets management (no hardcoded keys) -- Proper error messages (no data leaks) +**6. How I Decide to Use Skills:** +- Match your request keywords against skill descriptions +- Evaluate relevance to current task +- Load skill content when match threshold met +- Apply skill instructions to your request --- @@ -711,6 +817,47 @@ git worktree remove ../project-US-3.5 --- +### Project Initialization Workflow + +**Scenario:** Start a new project from scratch with complete documentation. + +```bash +# 1. Initialize project documentation +/lazy init-project "Build an AI-powered code review SaaS platform" + +# ✓ project-planner skill generates: +# - PROJECT-OVERVIEW.md (vision, goals, features) +# - SPECIFICATIONS.md (requirements, APIs, data models) +# ✓ tech-stack-architect skill generates: +# - TECH-STACK.md (React, Node.js, PostgreSQL + rationale) +# - ARCHITECTURE.md (microservices, mermaid diagrams) +# ✓ Files staged for git review + +# 2. Review and customize (optional) +# Open PROJECT-OVERVIEW.md, TECH-STACK.md in editor +# Make adjustments if needed + +# 3. Commit initial documentation +git commit -m "docs: initialize project documentation for code review platform" + +# 4. Start feature planning +/lazy plan "User authentication with GitHub OAuth" +# ✓ Uses PROJECT-OVERVIEW.md and TECH-STACK.md as context +# ✓ Creates US-1.1-user-authentication/ + +# 5. Begin implementation +/lazy code @US-1.1.md +``` + +**Timeline:** ~2-3 minutes for full init + +**Artifacts:** +- 4 documentation files (40-50KB total) +- Ready for feature planning +- Consistent technical foundation + +--- + ### Review Failure and Fix Workflow **Scenario:** Story review finds issues that need fixing with new debug report feature. diff --git a/README.md b/README.md index 3274b4e..eb6550e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # LAZY_DEV Framework -**Version**: 2.2.0 | **License**: MIT | **Status**: Production-Ready +**Version**: 2.2.0 | **License**: MIT | **Status**: Production-Ready | **Platforms**: Linux, macOS, Windows [![CI](https://github.com/MacroMan5/claude-code-workflow-plugins/workflows/CI/badge.svg)](https://github.com/MacroMan5/claude-code-workflow-plugins/actions) [![CodeQL](https://github.com/MacroMan5/claude-code-workflow-plugins/workflows/CodeQL%20Security%20Analysis/badge.svg)](https://github.com/MacroMan5/claude-code-workflow-plugins/actions) @@ -9,6 +9,7 @@ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Claude Code](https://img.shields.io/badge/Claude%20Code-Plugin-blueviolet.svg)](https://docs.claude.com/en/docs/claude-code) +[![Cross-Platform](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-brightgreen.svg)](./CROSS_PLATFORM.md) --- @@ -98,6 +99,14 @@ Automate the mundane (formatting, commits, PRs) while enforcing discipline (qual ## Installation +### Cross-Platform Support ✅ + +LAZY_DEV works seamlessly on **Linux, macOS, and Windows**. All hooks, scripts, and tools are cross-platform by design. + +**Platform-specific guides:** +- **All Platforms**: [CROSS_PLATFORM.md](./CROSS_PLATFORM.md) - Complete compatibility guide +- **Windows Users**: [WINDOWS_SETUP.md](./WINDOWS_SETUP.md) - Windows-specific setup + ### Prerequisites - Python 3.11+ @@ -106,6 +115,7 @@ Automate the mundane (formatting, commits, PRs) while enforcing discipline (qual - git - gh CLI (for PR creation) - Node.js (for MCP Memory, optional) +- **Windows only**: Git Bash or WSL (for bash scripts) ### Step 1: Install Framework diff --git a/docs/troubleshooting/CROSS_PLATFORM.md b/docs/troubleshooting/CROSS_PLATFORM.md new file mode 100644 index 0000000..f140fad --- /dev/null +++ b/docs/troubleshooting/CROSS_PLATFORM.md @@ -0,0 +1,348 @@ +# Cross-Platform Compatibility Guide + +**LAZY_DEV Framework** is designed to work seamlessly across **Linux, macOS, and Windows**. + +## Platform Support Matrix + +| Component | Linux | macOS | Windows | Notes | +|-----------|-------|-------|---------|-------| +| **Python Hooks** | ✅ | ✅ | ✅ | Uses pathlib.Path (cross-platform) | +| **Quality Scripts** | ✅ | ✅ | ✅ | Python-based, platform-agnostic | +| **MCP Memory** | ✅ | ✅ | ✅ | Node.js-based | +| **Git Operations** | ✅ | ✅ | ✅ | Standard git commands | +| **Bash Scripts** | ✅ | ✅ | ⚠️ | Windows requires Git Bash/WSL | +| **Commands** | ✅ | ✅ | ✅ | Claude Code CLI | +| **Skills** | ✅ | ✅ | ✅ | Markdown-based | +| **Agents** | ✅ | ✅ | ✅ | Markdown-based | + +## Key Design Principles + +### 1. Path Handling ✅ + +**All paths use cross-platform methods:** + +- **Python**: Uses `pathlib.Path` (not `os.path`) + ```python + from pathlib import Path + project_root = Path.cwd() + hook_path = project_root / ".claude" / "hooks" / "session_start.py" + ``` + +- **Settings**: Uses relative paths (work on all platforms) + ```json + { + "command": "python .claude/hooks/session_start.py" + } + ``` + +- **Forward slashes**: Work on all platforms in Python and most shells + ```bash + python .claude/hooks/session_start.py # Works everywhere + ``` + +### 2. No Platform-Specific Code ✅ + +**Verified**: No `sys.platform`, `os.name`, or platform checks in hooks or scripts. + +All functionality works identically across platforms without conditional logic. + +### 3. Standard Tools ✅ + +**Only standard cross-platform tools:** +- Python 3.11+ (available on all platforms) +- Node.js (available on all platforms) +- Git (available on all platforms) +- npm/npx (comes with Node.js) + +### 4. Shell Script Fallbacks ⚠️ + +**Bash scripts** (used for status line) require: +- **Linux/macOS**: Native bash (already installed) +- **Windows**: Git Bash (comes with Git for Windows) or WSL + +Alternative: The status line will fall back to minimal JSON if bash is unavailable. + +## Installation by Platform + +### Linux (Ubuntu/Debian) + +```bash +# Install prerequisites +sudo apt update +sudo apt install python3.11 python3-pip git nodejs npm + +# Install framework +cp -r LAZY_DEV/.claude/ .claude/ + +# Set environment variables +export ENRICHMENT_MODEL=claude-3-5-haiku +export MEMORY_FILE_PATH=.claude/memory/memory.jsonl + +# Add to ~/.bashrc or ~/.zshrc for persistence +echo 'export ENRICHMENT_MODEL=claude-3-5-haiku' >> ~/.bashrc +``` + +### macOS + +```bash +# Install prerequisites (using Homebrew) +brew install python@3.11 git node + +# Install framework +cp -r LAZY_DEV/.claude/ .claude/ + +# Set environment variables +export ENRICHMENT_MODEL=claude-3-5-haiku +export MEMORY_FILE_PATH=.claude/memory/memory.jsonl + +# Add to ~/.zshrc or ~/.bash_profile for persistence +echo 'export ENRICHMENT_MODEL=claude-3-5-haiku' >> ~/.zshrc +``` + +### Windows + +```powershell +# Install prerequisites +# - Python 3.11+ from python.org +# - Git for Windows from git-scm.com +# - Node.js from nodejs.org + +# Install framework +Copy-Item -Recurse LAZY_DEV\.claude .claude + +# Set environment variables (persistent) +[System.Environment]::SetEnvironmentVariable('ENRICHMENT_MODEL', 'claude-3-5-haiku', 'User') + +# Or for current session only +$env:ENRICHMENT_MODEL = "claude-3-5-haiku" +``` + +**See [WINDOWS_SETUP.md](./WINDOWS_SETUP.md) for detailed Windows instructions.** + +## Platform-Specific Considerations + +### Path Separators + +| Platform | Native | Python Handles | Settings.json | +|----------|--------|----------------|---------------| +| Linux | `/` | `/` and `\` | Use `/` or `.` | +| macOS | `/` | `/` and `\` | Use `/` or `.` | +| Windows | `\` | `/` and `\` | Use `/` or `.` | + +**Recommendation**: Always use forward slashes `/` or relative paths with `.` + +### Environment Variables + +| Platform | Set (Session) | Set (Persistent) | +|----------|---------------|------------------| +| **Linux/macOS** | `export VAR=value` | Add to `~/.bashrc` or `~/.zshrc` | +| **Windows (PowerShell)** | `$env:VAR = "value"` | `[System.Environment]::SetEnvironmentVariable()` | +| **Windows (CMD)** | `set VAR=value` | Use System Properties GUI | + +### Line Endings + +**Not an issue**: Python handles both `\n` (Unix) and `\r\n` (Windows) automatically. + +Git should be configured to handle line endings: +```bash +# Set once per machine +git config --global core.autocrlf input # Linux/macOS +git config --global core.autocrlf true # Windows +``` + +### Shell Scripts + +**Status line** (`.claude/status_lines/lazy_status.sh`) requires bash: + +| Platform | Bash Availability | +|----------|-------------------| +| Linux | ✅ Built-in | +| macOS | ✅ Built-in | +| Windows | ⚠️ Requires Git Bash or WSL | + +**Fallback**: If bash is unavailable, status line returns minimal JSON. + +## Testing Cross-Platform + +### Quick Test (All Platforms) + +```bash +# 1. Test Python hooks +python .claude/hooks/session_start.py +# Expected: JSON decode error (normal when run manually) + +# 2. Test scripts +python scripts/format.py --help +python scripts/lint.py --help + +# 3. Test MCP Memory +npx -y @modelcontextprotocol/server-memory +# Expected: "Knowledge Graph MCP Server running on stdio" +# Press Ctrl+C to stop + +# 4. Count skills +find .claude/skills -name "SKILL.md" | wc -l +# Expected: 17 + +# 5. Test in Claude Code +/help +# Should list lazy commands +``` + +### Platform-Specific Tests + +**Linux/macOS**: +```bash +# Test bash status line +bash .claude/status_lines/lazy_status.sh +# Should output JSON with status +``` + +**Windows** (PowerShell): +```powershell +# Test paths work with backslashes +Test-Path .claude\hooks\session_start.py +# Should return: True + +# Test environment variable +echo $env:ENRICHMENT_MODEL +# Should show: claude-3-5-haiku +``` + +## CI/CD Recommendations + +### GitHub Actions Matrix + +```yaml +name: Cross-Platform Tests + +on: [push, pull_request] + +jobs: + test: + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.11', '3.12', '3.13'] + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: '18' + + - name: Test hooks execute + run: python .claude/hooks/session_start.py || true + + - name: Test scripts have help + run: | + python scripts/format.py --help + python scripts/lint.py --help + + - name: Test MCP Memory + run: npx -y @modelcontextprotocol/server-memory & + timeout-minutes: 1 + + - name: Count skills + shell: bash + run: | + count=$(find .claude/skills -name "SKILL.md" | wc -l) + if [ $count -ne 17 ]; then exit 1; fi +``` + +## Common Cross-Platform Issues + +### Issue 1: Bash Not Found (Windows) + +**Symptom**: Status line doesn't work on Windows. + +**Solution**: Install Git for Windows (includes Git Bash). + +**Workaround**: Status line falls back to minimal JSON automatically. + +### Issue 2: Path Separators + +**Symptom**: Paths with backslashes don't work in settings.json. + +**Solution**: Use forward slashes or relative paths with `.` +```json +✅ "command": "python .claude/hooks/session_start.py" +❌ "command": "python .claude\\hooks\\session_start.py" +``` + +### Issue 3: Python Not in PATH + +**Symptom**: `python: command not found` + +**Solution by Platform**: +- **Linux**: Use `python3` or create symlink +- **macOS**: Use `python3` or install via Homebrew +- **Windows**: Check "Add to PATH" during Python installation + +### Issue 4: Different Python Command + +Some platforms use `python3` instead of `python`. + +**Fix**: Create alias or use `python3` explicitly in settings.json: +```json +{ + "command": "python3 .claude/hooks/session_start.py" +} +``` + +## Benefits of Cross-Platform Design + +✅ **Consistent Experience**: Same commands, same behavior across platforms +✅ **Team Collaboration**: Team members can use different OSes +✅ **CI/CD**: Run tests on all platforms with GitHub Actions +✅ **Maintainability**: No platform-specific code branches +✅ **Portability**: Move projects between machines easily + +## Verification Checklist + +After installation, verify cross-platform compatibility: + +- [ ] Python executes: `python --version` or `python3 --version` +- [ ] Node.js works: `node --version` +- [ ] Hooks execute: `python .claude/hooks/session_start.py` +- [ ] Scripts work: `python scripts/format.py --help` +- [ ] MCP starts: `npx -y @modelcontextprotocol/server-memory` +- [ ] All 17 skills present: `find .claude/skills -name "SKILL.md" | wc -l` +- [ ] Environment variable set: Check `ENRICHMENT_MODEL` +- [ ] Commands listed: `/help` in Claude Code + +## Platform-Specific Guides + +- **Windows**: See [WINDOWS_SETUP.md](./WINDOWS_SETUP.md) +- **Linux**: Standard installation works out of the box +- **macOS**: Standard installation works out of the box + +## Contributing Cross-Platform Code + +When contributing, ensure: + +1. **Use pathlib.Path** for all path operations (not `os.path`) +2. **Test on multiple platforms** (use GitHub Actions) +3. **Avoid platform checks** (no `if sys.platform == "win32"`) +4. **Use forward slashes** in example paths +5. **Document platform-specific requirements** clearly + +## Version + +**Document Version**: 1.0.0 +**Last Updated**: 2025-10-30 +**Framework Version**: 2.2.0 +**Tested Platforms**: Linux (Ubuntu 22.04), macOS (Sonoma 14.x), Windows (10/11) + +--- + +**LAZY_DEV Framework** - Write once, run everywhere! 🌍 diff --git a/docs/troubleshooting/WINDOWS_SETUP.md b/docs/troubleshooting/WINDOWS_SETUP.md new file mode 100644 index 0000000..5330801 --- /dev/null +++ b/docs/troubleshooting/WINDOWS_SETUP.md @@ -0,0 +1,299 @@ +# Windows Setup Guide for LAZY_DEV Framework + +This guide covers Windows-specific setup and troubleshooting for the LAZY_DEV Framework. + +## Prerequisites Verification + +### 1. Python 3.11+ +```powershell +python --version +# Should show: Python 3.11.x or higher +``` + +### 2. Node.js (for MCP Memory) +```powershell +node --version +# Should show: v18.x or higher +npx --version +# Should show npx version +``` + +### 3. Git Bash or WSL +The framework uses bash scripts for some operations. You need either: +- Git Bash (comes with Git for Windows) +- WSL (Windows Subsystem for Linux) + +Test: +```bash +bash --version +``` + +## Installation Steps + +### Step 1: Copy Framework Files +```powershell +# From the LAZY_DEV repository +Copy-Item -Recurse LAZY_DEV\.claude .claude +``` + +### Step 2: Fix Hook Paths (IMPORTANT for Windows) + +The `.claude/settings.json` file needs to use relative paths instead of `$CLAUDE_PROJECT_DIR`. + +**Correct format:** +```json +{ + "hooks": { + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "python .claude/hooks/user_prompt_submit.py" + } + ] + } + ] + } +} +``` + +**Incorrect format (will fail on Windows):** +```json +{ + "command": "python $CLAUDE_PROJECT_DIR/.claude/hooks/user_prompt_submit.py" +} +``` + +### Step 3: Set Environment Variables + +**PowerShell (current session):** +```powershell +$env:ENRICHMENT_MODEL = "claude-3-5-haiku" +$env:MEMORY_FILE_PATH = ".claude/memory/memory.jsonl" +``` + +**PowerShell (persistent):** +```powershell +[System.Environment]::SetEnvironmentVariable('ENRICHMENT_MODEL', 'claude-3-5-haiku', 'User') +[System.Environment]::SetEnvironmentVariable('MEMORY_FILE_PATH', '.claude/memory/memory.jsonl', 'User') +``` + +**Command Prompt:** +```cmd +set ENRICHMENT_MODEL=claude-3-5-haiku +set MEMORY_FILE_PATH=.claude/memory/memory.jsonl +``` + +### Step 4: Create Memory Directory +```powershell +New-Item -ItemType Directory -Force -Path .claude\memory +``` + +Or in bash: +```bash +mkdir -p .claude/memory +``` + +### Step 5: Verify Installation + +Test all components: + +**1. Test Hooks:** +```powershell +python .claude/hooks/user_prompt_submit.py +# Should show: JSON decode error (expected when run manually) + +python .claude/hooks/post_tool_use_format.py +# Should show: JSON decode error (expected when run manually) +``` + +**2. Test Scripts:** +```powershell +python scripts/format.py --help +python scripts/lint.py --help +python scripts/type_check.py --help +python scripts/test_runner.py --help +``` + +**3. Test MCP Memory:** +```bash +cd .claude/memory +npx -y @modelcontextprotocol/server-memory +# Should show: "Knowledge Graph MCP Server running on stdio" +# Press Ctrl+C to stop +``` + +**4. List Skills:** +```bash +find .claude/skills -name "SKILL.md" | wc -l +# Should show: 17 +``` + +**5. Test in Claude Code:** +``` +/help +# Should list lazy commands +``` + +## Common Issues and Solutions + +### Issue 1: Environment Variable Not Expanding + +**Symptom:** +``` +python: can't open file 'C:\path\$CLAUDE_PROJECT_DIR\.claude\hooks\...' +``` + +**Solution:** +Replace all `$CLAUDE_PROJECT_DIR` with relative paths in `.claude/settings.json`: +- Change: `python $CLAUDE_PROJECT_DIR/.claude/hooks/session_start.py` +- To: `python .claude/hooks/session_start.py` + +### Issue 2: Python Not Found + +**Symptom:** +``` +'python' is not recognized as an internal or external command +``` + +**Solution:** +1. Install Python 3.11+ from [python.org](https://www.python.org/downloads/) +2. During installation, check "Add Python to PATH" +3. Restart terminal +4. Verify: `python --version` + +### Issue 3: Node.js/NPX Not Found + +**Symptom:** +``` +'node' is not recognized as an internal or external command +``` + +**Solution:** +1. Install Node.js v18+ from [nodejs.org](https://nodejs.org/) +2. Restart terminal +3. Verify: `node --version` and `npx --version` + +### Issue 4: Bash Scripts Not Running + +**Symptom:** +``` +bash: command not found +``` + +**Solution:** +Install Git for Windows (includes Git Bash): +1. Download from [git-scm.com](https://git-scm.com/download/win) +2. During installation, select "Use Git and optional Unix tools from Command Prompt" +3. Restart terminal + +### Issue 5: Memory Directory Missing + +**Symptom:** +``` +ls: cannot access '.claude/memory/': No such file or directory +``` + +**Solution:** +```powershell +New-Item -ItemType Directory -Force -Path .claude\memory +``` + +### Issue 6: Hooks Not Executing in Claude Code + +**Symptom:** +Hooks don't seem to run when using Claude Code commands. + +**Solution:** +1. Verify hooks are enabled: `/hooks list` +2. Check Claude Code logs for errors +3. Ensure Python is in PATH: `python --version` +4. Restart Claude Code CLI + +## Path Handling Notes + +### Forward Slashes vs Backslashes + +- Python scripts: Use forward slashes `/` or `os.path.join()` +- PowerShell: Backslashes `\` work, but forward slashes `/` also work +- Bash scripts: Always use forward slashes `/` + +### Relative Paths + +All hook commands should use relative paths from the project root: +- ✅ `python .claude/hooks/session_start.py` +- ❌ `python $CLAUDE_PROJECT_DIR/.claude/hooks/session_start.py` +- ❌ `python C:\absolute\path\.claude\hooks\session_start.py` + +### Git Bash Path Translation + +Git Bash automatically translates Windows paths: +- Windows: `C:\Users\username\project` +- Git Bash: `/c/Users/username/project` + +This is handled automatically, no action needed. + +## Testing Checklist + +After installation, verify: + +- [ ] Python 3.11+ installed: `python --version` +- [ ] Node.js installed: `node --version` +- [ ] Git Bash or WSL available: `bash --version` +- [ ] Environment variables set: `echo $env:ENRICHMENT_MODEL` +- [ ] Memory directory created: `Test-Path .claude\memory` +- [ ] Hooks use relative paths in `.claude/settings.json` +- [ ] Hooks execute without Python errors: `python .claude/hooks/session_start.py` +- [ ] Scripts have help text: `python scripts/format.py --help` +- [ ] MCP Memory server runs: `npx -y @modelcontextprotocol/server-memory` +- [ ] 17 skills present: `find .claude/skills -name "SKILL.md" | wc -l` +- [ ] Commands listed in Claude Code: `/help` + +## Performance Tips + +### Use PowerShell 7+ + +PowerShell 7 (Core) is faster and more Unix-like than Windows PowerShell 5.1: + +```powershell +# Install PowerShell 7 +winget install Microsoft.PowerShell +``` + +### Use WSL2 for Better Performance + +WSL2 provides better performance for bash operations: + +```powershell +# Enable WSL2 +wsl --install +``` + +Then run Claude Code from within WSL2. + +## Additional Resources + +- [LAZY_DEV Framework README](./README.md) +- [CLAUDE.md Guide](./CLAUDE.md) +- [Workflow Documentation](./docs/plugins/WORKFLOW.md) +- [Memory System](./docs/plugins/MEMORY.md) + +## Tested Environment + +This guide was verified on: +- **OS**: Windows 10/11 +- **Python**: 3.13 +- **Node.js**: v22.15.0 +- **Git**: Git Bash 2.x +- **Terminal**: PowerShell, Git Bash + +## Version + +**Document Version**: 1.0.0 +**Last Updated**: 2025-10-30 +**Framework Version**: 2.2.0 + +--- + +**Need help?** Open an issue at [github.com/MacroMan5/claude-code-workflow-plugins](https://github.com/MacroMan5/claude-code-workflow-plugins/issues) diff --git a/scripts/format.py b/scripts/format.py index d1777fc..e6870a0 100644 --- a/scripts/format.py +++ b/scripts/format.py @@ -105,23 +105,23 @@ def format_path(target: Path, session_id: Optional[str]) -> int: ensure_path(target) step_results: list[StepResult] = [] - print(f"📝 Running Black on {target}...") - black_result = run_subprocess(["black", str(target)]) + print(f"Running Black on {target}...") + black_result = run_subprocess([sys.executable, "-m", "black", str(target)]) step_results.append(black_result) if black_result.exit_code != 0: - print(f"❌ Black failed:\n{black_result.stderr}", file=sys.stderr) + print(f"Black failed:\n{black_result.stderr}", file=sys.stderr) write_log(step_results, target, session_id) return black_result.exit_code - print(f"📝 Running Ruff format on {target}...") - ruff_result = run_subprocess(["ruff", "format", str(target)]) + print(f"Running Ruff format on {target}...") + ruff_result = run_subprocess([sys.executable, "-m", "ruff", "format", str(target)]) step_results.append(ruff_result) if ruff_result.exit_code != 0: - print(f"❌ Ruff format failed:\n{ruff_result.stderr}", file=sys.stderr) + print(f"Ruff format failed:\n{ruff_result.stderr}", file=sys.stderr) write_log(step_results, target, session_id) return ruff_result.exit_code - print("✅ Formatting complete") + print("Formatting complete") write_log(step_results, target, session_id) return 0 @@ -146,10 +146,10 @@ def main(argv: list[str] | None = None) -> int: try: return format_path(target, args.session_id) except FileNotFoundError as exc: - print(f"❌ {exc}", file=sys.stderr) + print(f"Error: {exc}", file=sys.stderr) return 1 except KeyboardInterrupt: - print("⚠️ Formatting interrupted", file=sys.stderr) + print("Warning: Formatting interrupted", file=sys.stderr) return 130