Skip to content

Commit a2d9dfa

Browse files
Improvement: Make the CLI work on windows (#161)
* Improvement: Make the CLI work on windows * Fix tmp file creation and add test * add error message when git missing * update CI to test windows and Macos --------- Co-authored-by: Romain Courtois <romain@coderamp.io>
1 parent 11d3f39 commit a2d9dfa

File tree

6 files changed

+161
-16
lines changed

6 files changed

+161
-16
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
strategy:
1313
fail-fast: true
1414
matrix:
15-
os: [ubuntu-latest, macos-latest]
15+
os: [ubuntu-latest, macos-latest, windows-latest]
1616
python-version: ["3.10", "3.11", "3.12", "3.13"]
1717

1818
steps:

setup.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
from pathlib import Path
2+
13
from setuptools import find_packages, setup
24

5+
this_directory = Path(__file__).parent
6+
long_description = (this_directory / "README.md").read_text(encoding="utf-8")
7+
38
setup(
49
name="gitingest",
510
version="0.1.2",
@@ -19,7 +24,7 @@
1924
author="Romain Courtois",
2025
author_email="romain@coderamp.io",
2126
description="CLI tool to analyze and create text dumps of codebases for LLMs",
22-
long_description=open("README.md").read(),
27+
long_description=long_description,
2328
long_description_content_type="text/markdown",
2429
url="https://github.com/cyclotruc/gitingest",
2530
classifiers=[

src/gitingest/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" Configuration file for the project. """
22

3+
import tempfile
34
from pathlib import Path
45

56
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
@@ -8,4 +9,5 @@
89
MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # 500 MB
910

1011
OUTPUT_FILE_PATH = "digest.txt"
11-
TMP_BASE_PATH = Path("/tmp/gitingest")
12+
13+
TMP_BASE_PATH = Path(tempfile.gettempdir()) / "gitingest"

src/gitingest/query_ingestion.py

Lines changed: 89 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
""" Functions to ingest and analyze a codebase directory or single file. """
22

3+
import locale
4+
import os
5+
import platform
36
from fnmatch import fnmatch
47
from pathlib import Path
58
from typing import Any
@@ -16,6 +19,61 @@
1619
from gitingest.notebook_utils import process_notebook
1720
from gitingest.query_parser import ParsedQuery
1821

22+
try:
23+
locale.setlocale(locale.LC_ALL, "")
24+
except locale.Error:
25+
locale.setlocale(locale.LC_ALL, "C")
26+
27+
28+
def _normalize_path(path: Path) -> Path:
29+
"""
30+
Normalize path for cross-platform compatibility.
31+
32+
Parameters
33+
----------
34+
path : Path
35+
The Path object to normalize.
36+
37+
Returns
38+
-------
39+
Path
40+
The normalized path with platform-specific separators and resolved components.
41+
"""
42+
return Path(os.path.normpath(str(path)))
43+
44+
45+
def _normalize_path_str(path: str | Path) -> str:
46+
"""
47+
Convert path to string with forward slashes for consistent output.
48+
49+
Parameters
50+
----------
51+
path : str | Path
52+
The path to convert, can be string or Path object.
53+
54+
Returns
55+
-------
56+
str
57+
The normalized path string with forward slashes as separators.
58+
"""
59+
return str(path).replace(os.sep, "/")
60+
61+
62+
def _get_encoding_list() -> list[str]:
63+
"""
64+
Get list of encodings to try, prioritized for the current platform.
65+
66+
Returns
67+
-------
68+
list[str]
69+
List of encoding names to try in priority order, starting with the
70+
platform's default encoding followed by common fallback encodings.
71+
"""
72+
encodings = ["utf-8", "utf-8-sig"]
73+
if platform.system() == "Windows":
74+
encodings.extend(["cp1252", "iso-8859-1"])
75+
return encodings + [locale.getpreferredencoding()]
76+
1977

2078
def _should_include(path: Path, base_path: Path, include_patterns: set[str]) -> bool:
2179
"""
@@ -107,9 +165,13 @@ def _is_safe_symlink(symlink_path: Path, base_path: Path) -> bool:
107165
`True` if the symlink points within the base directory, `False` otherwise.
108166
"""
109167
try:
110-
target_path = symlink_path.resolve()
111-
base_resolved = base_path.resolve()
112-
# It's "safe" if target_path == base_resolved or is inside base_resolved
168+
if platform.system() == "Windows":
169+
if not os.path.islink(str(symlink_path)):
170+
return False
171+
172+
target_path = _normalize_path(symlink_path.resolve())
173+
base_resolved = _normalize_path(base_path.resolve())
174+
113175
return base_resolved in target_path.parents or target_path == base_resolved
114176
except (OSError, ValueError):
115177
# If there's any error resolving the paths, consider it unsafe
@@ -162,10 +224,22 @@ def _read_file_content(file_path: Path) -> str:
162224
"""
163225
try:
164226
if file_path.suffix == ".ipynb":
165-
return process_notebook(file_path)
227+
try:
228+
return process_notebook(file_path)
229+
except Exception as e:
230+
return f"Error processing notebook: {e}"
231+
232+
for encoding in _get_encoding_list():
233+
try:
234+
with open(file_path, encoding=encoding) as f:
235+
return f.read()
236+
except UnicodeDecodeError:
237+
continue
238+
except OSError as e:
239+
return f"Error reading file: {e}"
240+
241+
return "Error: Unable to decode file with available encodings"
166242

167-
with open(file_path, encoding="utf-8", errors="ignore") as f:
168-
return f.read()
169243
except (OSError, InvalidNotebookError) as e:
170244
return f"Error reading file: {e}"
171245

@@ -531,10 +605,10 @@ def _extract_files_content(
531605
content = node["content"]
532606

533607
relative_path = Path(node["path"]).relative_to(query.local_path)
534-
608+
# Store paths with forward slashes
535609
files.append(
536610
{
537-
"path": str(relative_path),
611+
"path": _normalize_path_str(relative_path),
538612
"content": content,
539613
"size": node["size"],
540614
},
@@ -572,7 +646,8 @@ def _create_file_content_string(files: list[dict[str, Any]]) -> str:
572646
continue
573647

574648
output += separator
575-
output += f"File: {file['path']}\n"
649+
# Use forward slashes in output paths
650+
output += f"File: {_normalize_path_str(file['path'])}\n"
576651
output += separator
577652
output += f"{file['content']}\n\n"
578653

@@ -815,11 +890,13 @@ def run_ingest_query(query: ParsedQuery) -> tuple[str, str, str]:
815890
ValueError
816891
If the specified path cannot be found or if the file is not a text file.
817892
"""
818-
path = query.local_path / query.subpath.lstrip("/")
893+
subpath = _normalize_path(Path(query.subpath.strip("/"))).as_posix()
894+
path = _normalize_path(query.local_path / subpath)
895+
819896
if not path.exists():
820897
raise ValueError(f"{query.slug} cannot be found")
821898

822899
if query.type and query.type == "blob":
823-
return _ingest_single_file(path, query)
900+
return _ingest_single_file(_normalize_path(path.resolve()), query)
824901

825-
return _ingest_directory(path, query)
902+
return _ingest_directory(_normalize_path(path.resolve()), query)

src/gitingest/repository_clone.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
""" This module contains functions for cloning a Git repository to a local path. """
22

33
import asyncio
4+
import os
45
from dataclasses import dataclass
6+
from pathlib import Path
57

68
from gitingest.utils import async_timeout
79

@@ -61,6 +63,8 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
6163
------
6264
ValueError
6365
If the 'url' or 'local_path' parameters are missing, or if the repository is not found.
66+
OSError
67+
If there is an error creating the parent directory structure.
6468
"""
6569
# Extract and validate query parameters
6670
url: str = config.url
@@ -74,6 +78,13 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
7478
if not local_path:
7579
raise ValueError("The 'local_path' parameter is required.")
7680

81+
# Create parent directory if it doesn't exist
82+
parent_dir = Path(local_path).parent
83+
try:
84+
os.makedirs(parent_dir, exist_ok=True)
85+
except OSError as e:
86+
raise OSError(f"Failed to create parent directory {parent_dir}: {e}") from e
87+
7788
# Check if the repository exists
7889
if not await _check_repo_exists(url):
7990
raise ValueError("Repository not found, make sure it is public")
@@ -182,8 +193,24 @@ async def _run_git_command(*args: str) -> tuple[bytes, bytes]:
182193
Raises
183194
------
184195
RuntimeError
185-
If the Git command exits with a non-zero status.
196+
If Git is not installed or if the Git command exits with a non-zero status.
186197
"""
198+
# Check if Git is installed
199+
try:
200+
version_proc = await asyncio.create_subprocess_exec(
201+
"git",
202+
"--version",
203+
stdout=asyncio.subprocess.PIPE,
204+
stderr=asyncio.subprocess.PIPE,
205+
)
206+
_, stderr = await version_proc.communicate()
207+
if version_proc.returncode != 0:
208+
error_message = stderr.decode().strip() if stderr else "Git command not found"
209+
raise RuntimeError(f"Git is not installed or not accessible: {error_message}")
210+
except FileNotFoundError as exc:
211+
raise RuntimeError("Git is not installed. Please install Git before proceeding.") from exc
212+
213+
# Execute the requested Git command
187214
proc = await asyncio.create_subprocess_exec(
188215
*args,
189216
stdout=asyncio.subprocess.PIPE,

tests/test_repository_clone.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import asyncio
99
import os
10+
from pathlib import Path
1011
from unittest.mock import AsyncMock, patch
1112

1213
import pytest
@@ -362,3 +363,36 @@ async def test_clone_branch_with_slashes(tmp_path):
362363
clone_config.url,
363364
clone_config.local_path,
364365
)
366+
367+
368+
@pytest.mark.asyncio
369+
async def test_clone_repo_creates_parent_directory(tmp_path: Path) -> None:
370+
"""
371+
Test that clone_repo creates parent directories if they don't exist.
372+
373+
Given a local path with non-existent parent directories:
374+
When `clone_repo` is called,
375+
Then it should create the parent directories before attempting to clone.
376+
"""
377+
nested_path = tmp_path / "deep" / "nested" / "path" / "repo"
378+
clone_config = CloneConfig(
379+
url="https://github.com/user/repo",
380+
local_path=str(nested_path),
381+
)
382+
383+
with patch("gitingest.repository_clone._check_repo_exists", return_value=True):
384+
with patch("gitingest.repository_clone._run_git_command", new_callable=AsyncMock) as mock_exec:
385+
await clone_repo(clone_config)
386+
387+
# Verify parent directory was created
388+
assert nested_path.parent.exists()
389+
390+
# Verify git clone was called with correct parameters
391+
mock_exec.assert_called_once_with(
392+
"git",
393+
"clone",
394+
"--depth=1",
395+
"--single-branch",
396+
clone_config.url,
397+
str(nested_path),
398+
)

0 commit comments

Comments
 (0)