diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index fdb602c..de7e2b2 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -2,7 +2,7 @@ name: Upload Python Package to PyPI on: release: - types: [created] + types: [ created ] jobs: pypi-publish: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index df21491..b4ffa30 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,20 +11,20 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: [ "3.8", "3.9", "3.10", "3.11" ] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest-cov - pip install -e . - - name: Run tests - run: | - pytest tests/ \ No newline at end of file + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest-cov + pip install -e . + - name: Run tests + run: | + pytest tests/ \ No newline at end of file diff --git a/README.md b/README.md index 50e297b..40fa883 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,12 @@ / /___ | (_) || (_| || __/ / /\/\ \ / /_// \____/ \___/ \__,_| \___| \/ \//___,' -Ver. 0.0.2 -```` +Ver. 0.0.2b +``` # CodeMD -🚀 Transform code repositories into markdown-formatted strings ready for LLM prompting +🚀 Transform code files and repositories into markdown-formatted strings ready for LLM prompting [![Tests](https://github.com/dotpyu/codemd/actions/workflows/tests.yml/badge.svg)](https://github.com/dotpyu/codemd/actions/workflows/tests.yml) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) @@ -21,28 +21,32 @@ Ver. 0.0.2 ## 📝 Overview -CodeMD helps you convert your entire codebase into a format that's optimal for code-related prompts with Large Language Models (LLMs) like GPT-4, Claude, and others. It automatically processes your code files and outputs them in a clean, markdown-formatted structure that's perfect for LLM interactions. +CodeMD helps you convert your code files or entire codebase into a format that's optimal for code-related prompts with Large Language Models (LLMs) like GPT-4, Claude, and others. It automatically processes your code files and outputs them in a clean, markdown-formatted structure that's perfect for LLM interactions. ## ✨ Features -- 🔍 **Smart Directory Scanning**: Recursively scans directories for code files -- 🎯 **Flexible Configuration**: +- 🔍 **Flexible Processing**: + - Single file processing + - Recursive directory scanning +- 🎯 **Configurable Options**: - Configurable file extensions - File and pattern exclusion support - Custom .gitignore support -- 📊 **Intelligent Output**: +- 📊 **Smart Output**: - Markdown-formatted code blocks - - Preserved directory structure - - Repository structure visualization + - Optional directory structure visualization - Token count estimation (with tiktoken) + - Configurable output display - 📋 **Convenience**: - Simple command-line interface - Direct copy-to-clipboard support - Multiple output options -### 🎉 Recent Updates +### 🎉 Recent Updates (0.0.2b) -- ⭐ **NEW**: Repository structure visualization (disable with `--no-structure`) +- ⭐ **NEW**: Single file processing support +- ⭐ **NEW**: Configurable output display (use `--print` to show output) +- ⭐ **NEW**: Repository structure visualization (auto-disabled for single files, or use `--no-structure`) - ⭐ **NEW**: Automatic .gitignore support - Uses project's .gitignore by default - Custom .gitignore files via `--gitignore` @@ -65,13 +69,27 @@ pip install -e . ### Command Line Interface -**Basic Usage:** +**Single File Processing:** ```bash -codemd /path/to/your/code +# Process a single file (no output by default) +codemd /path/to/script.py + +# Process and display output +codemd /path/to/script.py --print + +# Save to file +codemd /path/to/script.py -o output.md ``` -**Custom Extensions and Output:** +**Directory Processing:** ```bash +# Basic directory scanning (no output by default) +codemd /path/to/your/code + +# Show output in terminal +codemd /path/to/your/code --print + +# Custom extensions and output file codemd /path/to/your/code -e py,java,sql -o output.md ``` @@ -106,6 +124,7 @@ Contributions are welcome! Feel free to open issues or submit pull requests. Distributed under the Apache 2.0 License. See `LICENSE` for more information. --- +
Made with ❤️ by Peilin -
+ \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b4cf90b..9302cf6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "codemd" -version = "0.0.2" +version = "0.0.2b" authors = [ { name = "Peilin Yu", email = "peilin_yu@brown.edu" }, ] diff --git a/setup.py b/setup.py index 5f76d85..131f746 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ import os + from setuptools import setup, find_packages with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8") as f: @@ -7,10 +8,9 @@ with open("requirements.txt", encoding="utf-8") as f: requirements = [line.strip() for line in f if line.strip() and not line.startswith("#")] - setup( name="codemd", - version="0.0.2", + version="0.0.2b", author="Peilin Yu", author_email="peilin_yu@brown.edu", description="Transform code repositories into markdown-formatted strings ready for LLM prompting", @@ -43,4 +43,4 @@ "Bug Reports": "https://github.com/dotpyu/codemd/issues", "Source": "https://github.com/dotpyu/codemd", }, -) \ No newline at end of file +) diff --git a/src/codemd/__init__.py b/src/codemd/__init__.py index 763546a..1675598 100644 --- a/src/codemd/__init__.py +++ b/src/codemd/__init__.py @@ -1,5 +1,5 @@ from .cli import main from .scanner import CodeScanner -__version__ = "0.0.2" -__all__ = ["CodeScanner", "main"] \ No newline at end of file +__version__ = "0.0.2b" +__all__ = ["CodeScanner", "main"] diff --git a/src/codemd/cli.py b/src/codemd/cli.py index feece11..21e6441 100644 --- a/src/codemd/cli.py +++ b/src/codemd/cli.py @@ -1,19 +1,19 @@ import argparse +import platform +import subprocess import sys from pathlib import Path from typing import Set, Tuple -import subprocess -import platform from .scanner import CodeScanner try: import tiktoken + TIKTOKEN_AVAILABLE = True except ImportError: TIKTOKEN_AVAILABLE = False - BANNER = r""" ___ _ ___ / __\ ___ __| | ___ /\/\ / \ @@ -24,13 +24,18 @@ EPILOG = """ Examples: - # Basic usage (prints to stdout) + # Basic usage (file or directory, no output by default) codemd /path/to/code + codemd /path/to/file.py + + # Print output to stdout + codemd /path/to/code --print + codemd /path/to/file.py --print - # Custom extensions (prints to stdout) + # Custom extensions codemd /path/to/code -e py,java,sql - # Save to file instead of printing + # Save to file codemd /path/to/code -o output.md # Exclude patterns and specific files @@ -39,7 +44,7 @@ # Non-recursive scan with custom output codemd /path/to/code --no-recursive -o custom.md - # Disable structure output + # Disable structure output (auto-disabled for single files) codemd /path/to/code --no-structure # Use specific gitignore files @@ -47,6 +52,9 @@ # Disable gitignore processing codemd /path/to/code --ignore-gitignore + + # Process single file and print output + codemd /path/to/script.py --print -o script.md """ @@ -54,13 +62,13 @@ def parse_arguments() -> argparse.Namespace: """Parse command line arguments.""" parser = argparse.ArgumentParser( prog='codemd', - description='Transform code repositories into markdown-formatted strings ready for LLM prompting', + description='Transform code repositories or files into markdown-formatted strings ready for LLM prompting', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=EPILOG ) - parser.add_argument('directory', type=str, help='Directory to scan') - parser.add_argument('-e', '--extensions', type=str, default='py,java,js,cpp,c,h,hpp', + parser.add_argument('path', type=str, help='File or directory to scan') + parser.add_argument('-e', '--extensions', type=str, default=None, help='Comma-separated list of file extensions to include (without dots)') parser.add_argument('--exclude-patterns', type=str, default='', help='Comma-separated list of patterns to exclude (e.g., test_,debug_)') @@ -74,6 +82,8 @@ def parse_arguments() -> argparse.Namespace: help='Enable verbose output') parser.add_argument('--no-structure', action='store_true', help='Disable repository structure output') + parser.add_argument('--print', action='store_true', + help='Print the markdown output (disabled by default)') parser.add_argument( '--gitignore', @@ -90,8 +100,10 @@ def parse_arguments() -> argparse.Namespace: return parser.parse_args() + def str_to_set(s: str) -> Set[str]: """Convert comma-separated string to set of strings.""" + if s is None: return None return {item.strip() for item in s.split(',') if item.strip()} @@ -203,19 +215,16 @@ def format_token_info(token_count: int, model_name: str) -> str: def main() -> int: print(BANNER) - print("Version 0.0.2") - print("Transform your code into LLM-ready prompts\n") + print("Version 0.0.2b") + print("Transform your code into LLM-ready prompts and automatically copy them to your clipboard!\n") try: args = parse_arguments() - directory = Path(args.directory) + path = Path(args.path) output_file = Path(args.output) if args.output else None - if not directory.exists(): - print(f"Error: Directory '{directory}' does not exist", file=sys.stderr) - return 1 - if not directory.is_dir(): - print(f"Error: '{directory}' is not a directory", file=sys.stderr) + if not path.exists(): + print(f"Error: Path '{path}' does not exist", file=sys.stderr) return 1 extensions = str_to_set(args.extensions) @@ -230,16 +239,18 @@ def main() -> int: ignore_gitignore=args.ignore_gitignore ) - scanner.no_structure = args.no_structure - + scanner.no_structure = args.no_structure or path.is_file() try: - content = scanner.scan_directory( - directory, - recursive=not args.no_recursive - ) + if path.is_file(): + content = scanner.scan_file(path) + else: + content = scanner.scan_directory( + path, + recursive=not args.no_recursive + ) except Exception as e: - print(f"Error scanning directory: {str(e)}", file=sys.stderr) + print(f"Error scanning path: {str(e)}", file=sys.stderr) return 1 files = content.count('```') // 2 @@ -262,7 +273,8 @@ def main() -> int: if args.verbose: print(f"\nProcessed {files} files ({chars:,} characters)") print(token_info + "\n") - print(content) + if args.print: # Only print content if --print flag is set + print(content) print(token_info) prompt_for_copy(content) diff --git a/src/codemd/scanner.py b/src/codemd/scanner.py index ebfadc9..28f1904 100644 --- a/src/codemd/scanner.py +++ b/src/codemd/scanner.py @@ -1,9 +1,54 @@ import argparse import sys from pathlib import Path -import pathspec from typing import Set, Optional, List +import pathspec + +DEFAULT_CODE_EXTENSIONS = { + # Systems Programming + 'c', 'h', # C + 'cpp', 'hpp', 'cc', # C++ + 'rs', 'rlib', # Rust + 'go', # Go + + # Web Development + 'js', 'jsx', 'ts', 'tsx', # JavaScript/TypeScript + 'html', 'htm', # HTML + 'css', 'scss', 'sass', # CSS and preprocessors + 'php', # PHP + 'vue', # Web Frameworks + + # General Purpose + 'py', # Python + 'java', # Java + 'cs', # C# + 'rb', # Ruby + 'kt', 'kts', # Kotlin + 'swift', # Swift + 'scala', # Scala + + # Shell/Scripts + 'sh', 'bash', # Shell scripts + 'ps1', # PowerShell + 'bat', 'cmd', # Windows Batch + + # Data/Config + 'sql', # SQL + 'r', 'R', # R + 'json', 'yaml', 'yml', # Data formats + 'xml', # XML + 'toml', # TOML + + # Others + 'pl', 'pm', # Perl + 'dart', # Dart + 'hs', # Haskell + 'lua', # Lua + 'ml', 'mli' # OCaml +} + + class CodeScanner: def __init__(self, @@ -13,7 +58,7 @@ def __init__(self, gitignore_files: Optional[List[str]] = None, ignore_gitignore: bool = False): """Initialize the CodeScanner with optional file extensions to filter.""" - self.extensions = extensions or {'py', 'java', 'js', 'cpp', 'c', 'h', 'hpp'} + self.extensions = extensions or DEFAULT_CODE_EXTENSIONS self.exclude_patterns = exclude_patterns or set() self.exclude_extensions = exclude_extensions or set() self.no_structure = False @@ -155,16 +200,34 @@ def scan_directory(self, directory: str, recursive: bool = True) -> str: return "\n".join(merged_content) + def scan_file(self, file_path: Path) -> str: + """Scan a single file and return its markdown-formatted content.""" + if not file_path.is_file(): + raise ValueError(f"Path {file_path} is not a file") + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + merged_content = [ + f"# {file_path.name}", + "```" + file_path.suffix.lstrip('.'), + content, + "```", + "" + ] + return "\n".join(merged_content) + except Exception as e: + raise ValueError(f"Error processing {file_path}: {str(e)}") def parse_arguments(): parser = argparse.ArgumentParser( - description='Scan directory for code files and create a markdown-formatted output' + description='Scan file or directory and create a markdown-formatted output' ) parser.add_argument( - 'directory', - help='Directory to scan' + 'path', + help='File or directory to scan' ) parser.add_argument( '-e', '--extensions', @@ -183,7 +246,7 @@ def parse_arguments(): ) parser.add_argument( '-o', '--output', - help='Output file path (defaults to print out unless specified)', + help='Output file path (if not specified, no output is printed)', default=None, ) parser.add_argument( @@ -191,6 +254,11 @@ def parse_arguments(): action='store_true', help='Disable recursive directory scanning' ) + parser.add_argument( + '--print', + action='store_true', + help='Print the markdown output' + ) return parser.parse_args() @@ -198,7 +266,6 @@ def parse_arguments(): args = parse_arguments() extensions = {ext.strip() for ext in args.extensions.split(',') if ext.strip()} - exclude_patterns = {pat.strip() for pat in args.exclude_patterns.split(',') if pat.strip()} exclude_extensions = {ext.strip() for ext in args.exclude_extensions.split(',') if ext.strip()} @@ -209,26 +276,35 @@ def parse_arguments(): exclude_extensions=exclude_extensions ) - print(f"Scanning directory: {args.directory}") + path = Path(args.path) + if not path.exists(): + raise ValueError(f"Path does not exist: {path}") + + print(f"Scanning {'file' if path.is_file() else 'directory'}: {args.path}") print(f"Including extensions: {', '.join(sorted(extensions))}") if exclude_patterns: print(f"Excluding patterns: {', '.join(sorted(exclude_patterns))}") if exclude_extensions: print(f"Excluding extensions: {', '.join(sorted(exclude_extensions))}") - merged_content = scanner.scan_directory( - args.directory, - recursive=not args.no_recursive - ) + if path.is_file(): + merged_content = scanner.scan_file(path) + else: + merged_content = scanner.scan_directory( + args.path, + recursive=not args.no_recursive + ) if args.output is not None: with open(args.output, 'w', encoding='utf-8') as f: f.write(merged_content) print(f"\nSuccess! Output written to: {args.output}") print(f"Total characters: {len(merged_content)}") - else: + elif args.print: print(merged_content) - + else: + print(f"Total characters: {len(merged_content)}") + print("Use --print to display the output or -o to save to a file") except Exception as e: print(f"Error: {str(e)}", file=sys.stderr) diff --git a/tests/test_scanner.py b/tests/test_scanner.py index 9f22d0a..54c6568 100644 --- a/tests/test_scanner.py +++ b/tests/test_scanner.py @@ -1,7 +1,9 @@ -import pytest +import tempfile from pathlib import Path + +import pytest + from codemd import CodeScanner -import tempfile class TestCodeScanner: @@ -176,4 +178,4 @@ def test_scan_directory_encoding_error(self, temp_dir): assert '# main.py' in content assert 'bad.py' in content - assert b'\x80invalid' not in content.encode() \ No newline at end of file + assert b'\x80invalid' not in content.encode()