diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index fdb602c..de7e2b2 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -2,7 +2,7 @@ name: Upload Python Package to PyPI
on:
release:
- types: [created]
+ types: [ created ]
jobs:
pypi-publish:
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index df21491..b4ffa30 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -11,20 +11,20 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ["3.8", "3.9", "3.10", "3.11"]
+ python-version: [ "3.8", "3.9", "3.10", "3.11" ]
steps:
- - uses: actions/checkout@v2
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install -r requirements.txt
- pip install pytest-cov
- pip install -e .
- - name: Run tests
- run: |
- pytest tests/
\ No newline at end of file
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+ pip install pytest-cov
+ pip install -e .
+ - name: Run tests
+ run: |
+ pytest tests/
\ No newline at end of file
diff --git a/README.md b/README.md
index 50e297b..40fa883 100644
--- a/README.md
+++ b/README.md
@@ -7,12 +7,12 @@
/ /___ | (_) || (_| || __/ / /\/\ \ / /_//
\____/ \___/ \__,_| \___| \/ \//___,'
-Ver. 0.0.2
-````
+Ver. 0.0.2b
+```
# CodeMD
-🚀 Transform code repositories into markdown-formatted strings ready for LLM prompting
+🚀 Transform code files and repositories into markdown-formatted strings ready for LLM prompting
[](https://github.com/dotpyu/codemd/actions/workflows/tests.yml)
[](https://opensource.org/licenses/Apache-2.0)
@@ -21,28 +21,32 @@ Ver. 0.0.2
## 📝 Overview
-CodeMD helps you convert your entire codebase into a format that's optimal for code-related prompts with Large Language Models (LLMs) like GPT-4, Claude, and others. It automatically processes your code files and outputs them in a clean, markdown-formatted structure that's perfect for LLM interactions.
+CodeMD helps you convert your code files or entire codebase into a format that's optimal for code-related prompts with Large Language Models (LLMs) like GPT-4, Claude, and others. It automatically processes your code files and outputs them in a clean, markdown-formatted structure that's perfect for LLM interactions.
## ✨ Features
-- 🔍 **Smart Directory Scanning**: Recursively scans directories for code files
-- 🎯 **Flexible Configuration**:
+- 🔍 **Flexible Processing**:
+ - Single file processing
+ - Recursive directory scanning
+- 🎯 **Configurable Options**:
- Configurable file extensions
- File and pattern exclusion support
- Custom .gitignore support
-- 📊 **Intelligent Output**:
+- 📊 **Smart Output**:
- Markdown-formatted code blocks
- - Preserved directory structure
- - Repository structure visualization
+ - Optional directory structure visualization
- Token count estimation (with tiktoken)
+ - Configurable output display
- 📋 **Convenience**:
- Simple command-line interface
- Direct copy-to-clipboard support
- Multiple output options
-### 🎉 Recent Updates
+### 🎉 Recent Updates (0.0.2b)
-- ⭐ **NEW**: Repository structure visualization (disable with `--no-structure`)
+- ⭐ **NEW**: Single file processing support
+- ⭐ **NEW**: Configurable output display (use `--print` to show output)
+- ⭐ **NEW**: Repository structure visualization (auto-disabled for single files, or use `--no-structure`)
- ⭐ **NEW**: Automatic .gitignore support
- Uses project's .gitignore by default
- Custom .gitignore files via `--gitignore`
@@ -65,13 +69,27 @@ pip install -e .
### Command Line Interface
-**Basic Usage:**
+**Single File Processing:**
```bash
-codemd /path/to/your/code
+# Process a single file (no output by default)
+codemd /path/to/script.py
+
+# Process and display output
+codemd /path/to/script.py --print
+
+# Save to file
+codemd /path/to/script.py -o output.md
```
-**Custom Extensions and Output:**
+**Directory Processing:**
```bash
+# Basic directory scanning (no output by default)
+codemd /path/to/your/code
+
+# Show output in terminal
+codemd /path/to/your/code --print
+
+# Custom extensions and output file
codemd /path/to/your/code -e py,java,sql -o output.md
```
@@ -106,6 +124,7 @@ Contributions are welcome! Feel free to open issues or submit pull requests.
Distributed under the Apache 2.0 License. See `LICENSE` for more information.
---
+
Made with ❤️ by Peilin
-
+
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index b4cf90b..9302cf6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "codemd"
-version = "0.0.2"
+version = "0.0.2b"
authors = [
{ name = "Peilin Yu", email = "peilin_yu@brown.edu" },
]
diff --git a/setup.py b/setup.py
index 5f76d85..131f746 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,5 @@
import os
+
from setuptools import setup, find_packages
with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8") as f:
@@ -7,10 +8,9 @@
with open("requirements.txt", encoding="utf-8") as f:
requirements = [line.strip() for line in f if line.strip() and not line.startswith("#")]
-
setup(
name="codemd",
- version="0.0.2",
+ version="0.0.2b",
author="Peilin Yu",
author_email="peilin_yu@brown.edu",
description="Transform code repositories into markdown-formatted strings ready for LLM prompting",
@@ -43,4 +43,4 @@
"Bug Reports": "https://github.com/dotpyu/codemd/issues",
"Source": "https://github.com/dotpyu/codemd",
},
-)
\ No newline at end of file
+)
diff --git a/src/codemd/__init__.py b/src/codemd/__init__.py
index 763546a..1675598 100644
--- a/src/codemd/__init__.py
+++ b/src/codemd/__init__.py
@@ -1,5 +1,5 @@
from .cli import main
from .scanner import CodeScanner
-__version__ = "0.0.2"
-__all__ = ["CodeScanner", "main"]
\ No newline at end of file
+__version__ = "0.0.2b"
+__all__ = ["CodeScanner", "main"]
diff --git a/src/codemd/cli.py b/src/codemd/cli.py
index feece11..21e6441 100644
--- a/src/codemd/cli.py
+++ b/src/codemd/cli.py
@@ -1,19 +1,19 @@
import argparse
+import platform
+import subprocess
import sys
from pathlib import Path
from typing import Set, Tuple
-import subprocess
-import platform
from .scanner import CodeScanner
try:
import tiktoken
+
TIKTOKEN_AVAILABLE = True
except ImportError:
TIKTOKEN_AVAILABLE = False
-
BANNER = r"""
___ _ ___
/ __\ ___ __| | ___ /\/\ / \
@@ -24,13 +24,18 @@
EPILOG = """
Examples:
- # Basic usage (prints to stdout)
+ # Basic usage (file or directory, no output by default)
codemd /path/to/code
+ codemd /path/to/file.py
+
+ # Print output to stdout
+ codemd /path/to/code --print
+ codemd /path/to/file.py --print
- # Custom extensions (prints to stdout)
+ # Custom extensions
codemd /path/to/code -e py,java,sql
- # Save to file instead of printing
+ # Save to file
codemd /path/to/code -o output.md
# Exclude patterns and specific files
@@ -39,7 +44,7 @@
# Non-recursive scan with custom output
codemd /path/to/code --no-recursive -o custom.md
- # Disable structure output
+ # Disable structure output (auto-disabled for single files)
codemd /path/to/code --no-structure
# Use specific gitignore files
@@ -47,6 +52,9 @@
# Disable gitignore processing
codemd /path/to/code --ignore-gitignore
+
+ # Process single file and print output
+ codemd /path/to/script.py --print -o script.md
"""
@@ -54,13 +62,13 @@ def parse_arguments() -> argparse.Namespace:
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
prog='codemd',
- description='Transform code repositories into markdown-formatted strings ready for LLM prompting',
+ description='Transform code repositories or files into markdown-formatted strings ready for LLM prompting',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=EPILOG
)
- parser.add_argument('directory', type=str, help='Directory to scan')
- parser.add_argument('-e', '--extensions', type=str, default='py,java,js,cpp,c,h,hpp',
+ parser.add_argument('path', type=str, help='File or directory to scan')
+ parser.add_argument('-e', '--extensions', type=str, default=None,
help='Comma-separated list of file extensions to include (without dots)')
parser.add_argument('--exclude-patterns', type=str, default='',
help='Comma-separated list of patterns to exclude (e.g., test_,debug_)')
@@ -74,6 +82,8 @@ def parse_arguments() -> argparse.Namespace:
help='Enable verbose output')
parser.add_argument('--no-structure', action='store_true',
help='Disable repository structure output')
+ parser.add_argument('--print', action='store_true',
+ help='Print the markdown output (disabled by default)')
parser.add_argument(
'--gitignore',
@@ -90,8 +100,10 @@ def parse_arguments() -> argparse.Namespace:
return parser.parse_args()
+
def str_to_set(s: str) -> Set[str]:
"""Convert comma-separated string to set of strings."""
+ if s is None: return None
return {item.strip() for item in s.split(',') if item.strip()}
@@ -203,19 +215,16 @@ def format_token_info(token_count: int, model_name: str) -> str:
def main() -> int:
print(BANNER)
- print("Version 0.0.2")
- print("Transform your code into LLM-ready prompts\n")
+ print("Version 0.0.2b")
+ print("Transform your code into LLM-ready prompts and automatically copy them to your clipboard!\n")
try:
args = parse_arguments()
- directory = Path(args.directory)
+ path = Path(args.path)
output_file = Path(args.output) if args.output else None
- if not directory.exists():
- print(f"Error: Directory '{directory}' does not exist", file=sys.stderr)
- return 1
- if not directory.is_dir():
- print(f"Error: '{directory}' is not a directory", file=sys.stderr)
+ if not path.exists():
+ print(f"Error: Path '{path}' does not exist", file=sys.stderr)
return 1
extensions = str_to_set(args.extensions)
@@ -230,16 +239,18 @@ def main() -> int:
ignore_gitignore=args.ignore_gitignore
)
- scanner.no_structure = args.no_structure
-
+ scanner.no_structure = args.no_structure or path.is_file()
try:
- content = scanner.scan_directory(
- directory,
- recursive=not args.no_recursive
- )
+ if path.is_file():
+ content = scanner.scan_file(path)
+ else:
+ content = scanner.scan_directory(
+ path,
+ recursive=not args.no_recursive
+ )
except Exception as e:
- print(f"Error scanning directory: {str(e)}", file=sys.stderr)
+ print(f"Error scanning path: {str(e)}", file=sys.stderr)
return 1
files = content.count('```') // 2
@@ -262,7 +273,8 @@ def main() -> int:
if args.verbose:
print(f"\nProcessed {files} files ({chars:,} characters)")
print(token_info + "\n")
- print(content)
+ if args.print: # Only print content if --print flag is set
+ print(content)
print(token_info)
prompt_for_copy(content)
diff --git a/src/codemd/scanner.py b/src/codemd/scanner.py
index ebfadc9..28f1904 100644
--- a/src/codemd/scanner.py
+++ b/src/codemd/scanner.py
@@ -1,9 +1,54 @@
import argparse
import sys
from pathlib import Path
-import pathspec
from typing import Set, Optional, List
+import pathspec
+
+DEFAULT_CODE_EXTENSIONS = {
+ # Systems Programming
+ 'c', 'h', # C
+ 'cpp', 'hpp', 'cc', # C++
+ 'rs', 'rlib', # Rust
+ 'go', # Go
+
+ # Web Development
+ 'js', 'jsx', 'ts', 'tsx', # JavaScript/TypeScript
+ 'html', 'htm', # HTML
+ 'css', 'scss', 'sass', # CSS and preprocessors
+ 'php', # PHP
+ 'vue', # Web Frameworks
+
+ # General Purpose
+ 'py', # Python
+ 'java', # Java
+ 'cs', # C#
+ 'rb', # Ruby
+ 'kt', 'kts', # Kotlin
+ 'swift', # Swift
+ 'scala', # Scala
+
+ # Shell/Scripts
+ 'sh', 'bash', # Shell scripts
+ 'ps1', # PowerShell
+ 'bat', 'cmd', # Windows Batch
+
+ # Data/Config
+ 'sql', # SQL
+ 'r', 'R', # R
+ 'json', 'yaml', 'yml', # Data formats
+ 'xml', # XML
+ 'toml', # TOML
+
+ # Others
+ 'pl', 'pm', # Perl
+ 'dart', # Dart
+ 'hs', # Haskell
+ 'lua', # Lua
+ 'ml', 'mli' # OCaml
+}
+
+
class CodeScanner:
def __init__(self,
@@ -13,7 +58,7 @@ def __init__(self,
gitignore_files: Optional[List[str]] = None,
ignore_gitignore: bool = False):
"""Initialize the CodeScanner with optional file extensions to filter."""
- self.extensions = extensions or {'py', 'java', 'js', 'cpp', 'c', 'h', 'hpp'}
+ self.extensions = extensions or DEFAULT_CODE_EXTENSIONS
self.exclude_patterns = exclude_patterns or set()
self.exclude_extensions = exclude_extensions or set()
self.no_structure = False
@@ -155,16 +200,34 @@ def scan_directory(self, directory: str, recursive: bool = True) -> str:
return "\n".join(merged_content)
+ def scan_file(self, file_path: Path) -> str:
+ """Scan a single file and return its markdown-formatted content."""
+ if not file_path.is_file():
+ raise ValueError(f"Path {file_path} is not a file")
+ try:
+ with open(file_path, 'r', encoding='utf-8') as f:
+ content = f.read()
+
+ merged_content = [
+ f"# {file_path.name}",
+ "```" + file_path.suffix.lstrip('.'),
+ content,
+ "```",
+ ""
+ ]
+ return "\n".join(merged_content)
+ except Exception as e:
+ raise ValueError(f"Error processing {file_path}: {str(e)}")
def parse_arguments():
parser = argparse.ArgumentParser(
- description='Scan directory for code files and create a markdown-formatted output'
+ description='Scan file or directory and create a markdown-formatted output'
)
parser.add_argument(
- 'directory',
- help='Directory to scan'
+ 'path',
+ help='File or directory to scan'
)
parser.add_argument(
'-e', '--extensions',
@@ -183,7 +246,7 @@ def parse_arguments():
)
parser.add_argument(
'-o', '--output',
- help='Output file path (defaults to print out unless specified)',
+ help='Output file path (if not specified, no output is printed)',
default=None,
)
parser.add_argument(
@@ -191,6 +254,11 @@ def parse_arguments():
action='store_true',
help='Disable recursive directory scanning'
)
+ parser.add_argument(
+ '--print',
+ action='store_true',
+ help='Print the markdown output'
+ )
return parser.parse_args()
@@ -198,7 +266,6 @@ def parse_arguments():
args = parse_arguments()
extensions = {ext.strip() for ext in args.extensions.split(',') if ext.strip()}
-
exclude_patterns = {pat.strip() for pat in args.exclude_patterns.split(',') if pat.strip()}
exclude_extensions = {ext.strip() for ext in args.exclude_extensions.split(',') if ext.strip()}
@@ -209,26 +276,35 @@ def parse_arguments():
exclude_extensions=exclude_extensions
)
- print(f"Scanning directory: {args.directory}")
+ path = Path(args.path)
+ if not path.exists():
+ raise ValueError(f"Path does not exist: {path}")
+
+ print(f"Scanning {'file' if path.is_file() else 'directory'}: {args.path}")
print(f"Including extensions: {', '.join(sorted(extensions))}")
if exclude_patterns:
print(f"Excluding patterns: {', '.join(sorted(exclude_patterns))}")
if exclude_extensions:
print(f"Excluding extensions: {', '.join(sorted(exclude_extensions))}")
- merged_content = scanner.scan_directory(
- args.directory,
- recursive=not args.no_recursive
- )
+ if path.is_file():
+ merged_content = scanner.scan_file(path)
+ else:
+ merged_content = scanner.scan_directory(
+ args.path,
+ recursive=not args.no_recursive
+ )
if args.output is not None:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(merged_content)
print(f"\nSuccess! Output written to: {args.output}")
print(f"Total characters: {len(merged_content)}")
- else:
+ elif args.print:
print(merged_content)
-
+ else:
+ print(f"Total characters: {len(merged_content)}")
+ print("Use --print to display the output or -o to save to a file")
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
diff --git a/tests/test_scanner.py b/tests/test_scanner.py
index 9f22d0a..54c6568 100644
--- a/tests/test_scanner.py
+++ b/tests/test_scanner.py
@@ -1,7 +1,9 @@
-import pytest
+import tempfile
from pathlib import Path
+
+import pytest
+
from codemd import CodeScanner
-import tempfile
class TestCodeScanner:
@@ -176,4 +178,4 @@ def test_scan_directory_encoding_error(self, temp_dir):
assert '# main.py' in content
assert 'bad.py' in content
- assert b'\x80invalid' not in content.encode()
\ No newline at end of file
+ assert b'\x80invalid' not in content.encode()