|
1 | 1 | """ Functions to ingest and analyze a codebase directory or single file. """ |
2 | 2 |
|
| 3 | +import locale |
| 4 | +import os |
| 5 | +import platform |
3 | 6 | from fnmatch import fnmatch |
4 | 7 | from pathlib import Path |
5 | 8 | from typing import Any |
|
16 | 19 | from gitingest.notebook_utils import process_notebook |
17 | 20 | from gitingest.query_parser import ParsedQuery |
18 | 21 |
|
| 22 | +try: |
| 23 | + locale.setlocale(locale.LC_ALL, "") |
| 24 | +except locale.Error: |
| 25 | + locale.setlocale(locale.LC_ALL, "C") |
| 26 | + |
| 27 | + |
| 28 | +def _normalize_path(path: Path) -> Path: |
| 29 | + """ |
| 30 | + Normalize path for cross-platform compatibility. |
| 31 | +
|
| 32 | + Parameters |
| 33 | + ---------- |
| 34 | + path : Path |
| 35 | + The Path object to normalize. |
| 36 | +
|
| 37 | + Returns |
| 38 | + ------- |
| 39 | + Path |
| 40 | + The normalized path with platform-specific separators and resolved components. |
| 41 | + """ |
| 42 | + return Path(os.path.normpath(str(path))) |
| 43 | + |
| 44 | + |
| 45 | +def _normalize_path_str(path: str | Path) -> str: |
| 46 | + """ |
| 47 | + Convert path to string with forward slashes for consistent output. |
| 48 | +
|
| 49 | + Parameters |
| 50 | + ---------- |
| 51 | + path : str | Path |
| 52 | + The path to convert, can be string or Path object. |
| 53 | +
|
| 54 | + Returns |
| 55 | + ------- |
| 56 | + str |
| 57 | + The normalized path string with forward slashes as separators. |
| 58 | + """ |
| 59 | + return str(path).replace(os.sep, "/") |
| 60 | + |
| 61 | + |
| 62 | +def _get_encoding_list() -> list[str]: |
| 63 | + """ |
| 64 | + Get list of encodings to try, prioritized for the current platform. |
| 65 | +
|
| 66 | + Returns |
| 67 | + ------- |
| 68 | + list[str] |
| 69 | + List of encoding names to try in priority order, starting with the |
| 70 | + platform's default encoding followed by common fallback encodings. |
| 71 | + """ |
| 72 | + encodings = ["utf-8", "utf-8-sig"] |
| 73 | + if platform.system() == "Windows": |
| 74 | + encodings.extend(["cp1252", "iso-8859-1"]) |
| 75 | + return encodings + [locale.getpreferredencoding()] |
| 76 | + |
19 | 77 |
|
20 | 78 | def _should_include(path: Path, base_path: Path, include_patterns: set[str]) -> bool: |
21 | 79 | """ |
@@ -107,9 +165,13 @@ def _is_safe_symlink(symlink_path: Path, base_path: Path) -> bool: |
107 | 165 | `True` if the symlink points within the base directory, `False` otherwise. |
108 | 166 | """ |
109 | 167 | try: |
110 | | - target_path = symlink_path.resolve() |
111 | | - base_resolved = base_path.resolve() |
112 | | - # It's "safe" if target_path == base_resolved or is inside base_resolved |
| 168 | + if platform.system() == "Windows": |
| 169 | + if not os.path.islink(str(symlink_path)): |
| 170 | + return False |
| 171 | + |
| 172 | + target_path = _normalize_path(symlink_path.resolve()) |
| 173 | + base_resolved = _normalize_path(base_path.resolve()) |
| 174 | + |
113 | 175 | return base_resolved in target_path.parents or target_path == base_resolved |
114 | 176 | except (OSError, ValueError): |
115 | 177 | # If there's any error resolving the paths, consider it unsafe |
@@ -162,10 +224,22 @@ def _read_file_content(file_path: Path) -> str: |
162 | 224 | """ |
163 | 225 | try: |
164 | 226 | if file_path.suffix == ".ipynb": |
165 | | - return process_notebook(file_path) |
| 227 | + try: |
| 228 | + return process_notebook(file_path) |
| 229 | + except Exception as e: |
| 230 | + return f"Error processing notebook: {e}" |
| 231 | + |
| 232 | + for encoding in _get_encoding_list(): |
| 233 | + try: |
| 234 | + with open(file_path, encoding=encoding) as f: |
| 235 | + return f.read() |
| 236 | + except UnicodeDecodeError: |
| 237 | + continue |
| 238 | + except OSError as e: |
| 239 | + return f"Error reading file: {e}" |
| 240 | + |
| 241 | + return "Error: Unable to decode file with available encodings" |
166 | 242 |
|
167 | | - with open(file_path, encoding="utf-8", errors="ignore") as f: |
168 | | - return f.read() |
169 | 243 | except (OSError, InvalidNotebookError) as e: |
170 | 244 | return f"Error reading file: {e}" |
171 | 245 |
|
@@ -531,10 +605,10 @@ def _extract_files_content( |
531 | 605 | content = node["content"] |
532 | 606 |
|
533 | 607 | relative_path = Path(node["path"]).relative_to(query.local_path) |
534 | | - |
| 608 | + # Store paths with forward slashes |
535 | 609 | files.append( |
536 | 610 | { |
537 | | - "path": str(relative_path), |
| 611 | + "path": _normalize_path_str(relative_path), |
538 | 612 | "content": content, |
539 | 613 | "size": node["size"], |
540 | 614 | }, |
@@ -572,7 +646,8 @@ def _create_file_content_string(files: list[dict[str, Any]]) -> str: |
572 | 646 | continue |
573 | 647 |
|
574 | 648 | output += separator |
575 | | - output += f"File: {file['path']}\n" |
| 649 | + # Use forward slashes in output paths |
| 650 | + output += f"File: {_normalize_path_str(file['path'])}\n" |
576 | 651 | output += separator |
577 | 652 | output += f"{file['content']}\n\n" |
578 | 653 |
|
@@ -815,11 +890,13 @@ def run_ingest_query(query: ParsedQuery) -> tuple[str, str, str]: |
815 | 890 | ValueError |
816 | 891 | If the specified path cannot be found or if the file is not a text file. |
817 | 892 | """ |
818 | | - path = query.local_path / query.subpath.lstrip("/") |
| 893 | + subpath = _normalize_path(Path(query.subpath.strip("/"))).as_posix() |
| 894 | + path = _normalize_path(query.local_path / subpath) |
| 895 | + |
819 | 896 | if not path.exists(): |
820 | 897 | raise ValueError(f"{query.slug} cannot be found") |
821 | 898 |
|
822 | 899 | if query.type and query.type == "blob": |
823 | | - return _ingest_single_file(path, query) |
| 900 | + return _ingest_single_file(_normalize_path(path.resolve()), query) |
824 | 901 |
|
825 | | - return _ingest_directory(path, query) |
| 902 | + return _ingest_directory(_normalize_path(path.resolve()), query) |
0 commit comments