Skip to content

Commit 58dbe2c

Browse files
refactor: code cleanup, utils extraction, and test improvements (#141)
* Refactor code for lifespan, template usage, and improved tests - Move background tasks and rate-limit handler into utils.py - Reference TEMPLATES from config instead of inline Jinja2Templates - Adopt Given/When/Then docstrings for test clarity - Parametrize some tests and consolidate code across query_parser tests - Add pytest.warns context handler to test_parse_repo_source_with_failed_git_command
1 parent 1836809 commit 58dbe2c

File tree

13 files changed

+703
-399
lines changed

13 files changed

+703
-399
lines changed

src/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from pathlib import Path
44

5+
from fastapi.templating import Jinja2Templates
6+
57
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
68
MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal
79
MAX_FILES = 10_000 # Maximum number of files to process
@@ -20,3 +22,5 @@
2022
{"name": "Tldraw", "url": "https://github.com/tldraw/tldraw"},
2123
{"name": "ApiAnalytics", "url": "https://github.com/tom-draper/api-analytics"},
2224
]
25+
26+
templates = Jinja2Templates(directory="templates")

src/gitingest/query_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ async def _configure_branch_and_subpath(remaining_parts: list[str], url: str) ->
227227
# Fetch the list of branches from the remote repository
228228
branches: list[str] = await fetch_remote_branch_list(url)
229229
except RuntimeError as e:
230-
warnings.warn(f"Warning: Failed to fetch branch list: {e}")
230+
warnings.warn(f"Warning: Failed to fetch branch list: {e}", RuntimeWarning)
231231
return remaining_parts.pop(0)
232232

233233
branch = []

src/main.py

Lines changed: 3 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -1,157 +1,27 @@
11
""" Main module for the FastAPI application. """
22

3-
import asyncio
43
import os
5-
import shutil
6-
import time
7-
from contextlib import asynccontextmanager
8-
from pathlib import Path
94

105
from api_analytics.fastapi import Analytics
116
from dotenv import load_dotenv
127
from fastapi import FastAPI, Request
13-
from fastapi.responses import FileResponse, HTMLResponse, Response
8+
from fastapi.responses import FileResponse, HTMLResponse
149
from fastapi.staticfiles import StaticFiles
15-
from fastapi.templating import Jinja2Templates
16-
from slowapi import _rate_limit_exceeded_handler
1710
from slowapi.errors import RateLimitExceeded
1811
from starlette.middleware.trustedhost import TrustedHostMiddleware
1912

20-
from config import DELETE_REPO_AFTER, TMP_BASE_PATH
13+
from config import templates
2114
from routers import download, dynamic, index
2215
from server_utils import limiter
16+
from utils import lifespan, rate_limit_exception_handler
2317

2418
# Load environment variables from .env file
2519
load_dotenv()
2620

27-
28-
async def remove_old_repositories():
29-
"""
30-
Background task that runs periodically to clean up old repository directories.
31-
32-
This task:
33-
- Scans the TMP_BASE_PATH directory every 60 seconds
34-
- Removes directories older than DELETE_REPO_AFTER seconds
35-
- Before deletion, logs repository URLs to history.txt if a matching .txt file exists
36-
- Handles errors gracefully if deletion fails
37-
38-
The repository URL is extracted from the first .txt file in each directory,
39-
assuming the filename format: "owner-repository.txt"
40-
"""
41-
while True:
42-
try:
43-
if not TMP_BASE_PATH.exists():
44-
await asyncio.sleep(60)
45-
continue
46-
47-
current_time = time.time()
48-
49-
for folder in TMP_BASE_PATH.iterdir():
50-
if not folder.is_dir():
51-
continue
52-
53-
# Skip if folder is not old enough
54-
if current_time - folder.stat().st_ctime <= DELETE_REPO_AFTER:
55-
continue
56-
57-
await process_folder(folder)
58-
59-
except Exception as e:
60-
print(f"Error in remove_old_repositories: {e}")
61-
62-
await asyncio.sleep(60)
63-
64-
65-
async def process_folder(folder: Path) -> None:
66-
"""
67-
Process a single folder for deletion and logging.
68-
69-
Parameters
70-
----------
71-
folder : Path
72-
The path to the folder to be processed.
73-
"""
74-
# Try to log repository URL before deletion
75-
try:
76-
txt_files = [f for f in folder.iterdir() if f.suffix == ".txt"]
77-
78-
# Extract owner and repository name from the filename
79-
if txt_files and "-" in (filename := txt_files[0].stem):
80-
owner, repo = filename.split("-", 1)
81-
repo_url = f"{owner}/{repo}"
82-
with open("history.txt", mode="a", encoding="utf-8") as history:
83-
history.write(f"{repo_url}\n")
84-
85-
except Exception as e:
86-
print(f"Error logging repository URL for {folder}: {e}")
87-
88-
# Delete the folder
89-
try:
90-
shutil.rmtree(folder)
91-
except Exception as e:
92-
print(f"Error deleting {folder}: {e}")
93-
94-
95-
@asynccontextmanager
96-
async def lifespan(_: FastAPI):
97-
"""
98-
Lifecycle manager for the FastAPI application.
99-
Handles startup and shutdown events.
100-
101-
Parameters
102-
----------
103-
_ : FastAPI
104-
The FastAPI application instance (unused).
105-
106-
Yields
107-
-------
108-
None
109-
Yields control back to the FastAPI application while the background task runs.
110-
"""
111-
task = asyncio.create_task(remove_old_repositories())
112-
113-
yield
114-
# Cancel the background task on shutdown
115-
task.cancel()
116-
try:
117-
await task
118-
except asyncio.CancelledError:
119-
pass
120-
121-
12221
# Initialize the FastAPI application with lifespan
12322
app = FastAPI(lifespan=lifespan)
12423
app.state.limiter = limiter
12524

126-
127-
async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response:
128-
"""
129-
Custom exception handler for rate-limiting errors.
130-
131-
Parameters
132-
----------
133-
request : Request
134-
The incoming HTTP request.
135-
exc : Exception
136-
The exception raised, expected to be RateLimitExceeded.
137-
138-
Returns
139-
-------
140-
Response
141-
A response indicating that the rate limit has been exceeded.
142-
143-
Raises
144-
------
145-
exc
146-
If the exception is not a RateLimitExceeded error, it is re-raised.
147-
"""
148-
if isinstance(exc, RateLimitExceeded):
149-
# Delegate to the default rate limit handler
150-
return _rate_limit_exceeded_handler(request, exc)
151-
# Re-raise other exceptions
152-
raise exc
153-
154-
15525
# Register the custom exception handler for rate limits
15626
app.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler)
15727

@@ -174,9 +44,6 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp
17444
# Add middleware to enforce allowed hosts
17545
app.add_middleware(TrustedHostMiddleware, allowed_hosts=allowed_hosts)
17646

177-
# Set up template rendering
178-
templates = Jinja2Templates(directory="templates")
179-
18047

18148
@app.get("/health")
18249
async def health_check() -> dict[str, str]:

src/query_processor.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,14 @@
33
from functools import partial
44

55
from fastapi import Request
6-
from fastapi.templating import Jinja2Templates
76
from starlette.templating import _TemplateResponse
87

9-
from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE
8+
from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE, templates
109
from gitingest.query_ingestion import run_ingest_query
1110
from gitingest.query_parser import ParsedQuery, parse_query
1211
from gitingest.repository_clone import CloneConfig, clone_repo
1312
from server_utils import Colors, log_slider_to_size
1413

15-
templates = Jinja2Templates(directory="templates")
16-
1714

1815
async def process_query(
1916
request: Request,

src/routers/dynamic.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22

33
from fastapi import APIRouter, Form, Request
44
from fastapi.responses import HTMLResponse
5-
from fastapi.templating import Jinja2Templates
65

6+
from config import templates
77
from query_processor import process_query
88
from server_utils import limiter
99

1010
router = APIRouter()
11-
templates = Jinja2Templates(directory="templates")
1211

1312

1413
@router.get("/{full_path:path}")

src/routers/index.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@
22

33
from fastapi import APIRouter, Form, Request
44
from fastapi.responses import HTMLResponse
5-
from fastapi.templating import Jinja2Templates
65

7-
from config import EXAMPLE_REPOS
6+
from config import EXAMPLE_REPOS, templates
87
from query_processor import process_query
98
from server_utils import limiter
109

1110
router = APIRouter()
12-
templates = Jinja2Templates(directory="templates")
1311

1412

1513
@router.get("/", response_class=HTMLResponse)

src/utils.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
""" Utility functions for the FastAPI server. """
2+
3+
import asyncio
4+
import shutil
5+
import time
6+
from contextlib import asynccontextmanager
7+
from pathlib import Path
8+
9+
from fastapi import FastAPI, Request
10+
from fastapi.responses import Response
11+
from slowapi import _rate_limit_exceeded_handler
12+
from slowapi.errors import RateLimitExceeded
13+
14+
from config import DELETE_REPO_AFTER, TMP_BASE_PATH
15+
16+
17+
async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response:
18+
"""
19+
Custom exception handler for rate-limiting errors.
20+
21+
Parameters
22+
----------
23+
request : Request
24+
The incoming HTTP request.
25+
exc : Exception
26+
The exception raised, expected to be RateLimitExceeded.
27+
28+
Returns
29+
-------
30+
Response
31+
A response indicating that the rate limit has been exceeded.
32+
33+
Raises
34+
------
35+
exc
36+
If the exception is not a RateLimitExceeded error, it is re-raised.
37+
"""
38+
if isinstance(exc, RateLimitExceeded):
39+
# Delegate to the default rate limit handler
40+
return _rate_limit_exceeded_handler(request, exc)
41+
# Re-raise other exceptions
42+
raise exc
43+
44+
45+
@asynccontextmanager
46+
async def lifespan(_: FastAPI):
47+
"""
48+
Lifecycle manager for handling startup and shutdown events for the FastAPI application.
49+
50+
Parameters
51+
----------
52+
_ : FastAPI
53+
The FastAPI application instance (unused).
54+
55+
Yields
56+
-------
57+
None
58+
Yields control back to the FastAPI application while the background task runs.
59+
"""
60+
task = asyncio.create_task(_remove_old_repositories())
61+
62+
yield
63+
# Cancel the background task on shutdown
64+
task.cancel()
65+
try:
66+
await task
67+
except asyncio.CancelledError:
68+
pass
69+
70+
71+
async def _remove_old_repositories():
72+
"""
73+
Periodically remove old repository folders.
74+
75+
Background task that runs periodically to clean up old repository directories.
76+
77+
This task:
78+
- Scans the TMP_BASE_PATH directory every 60 seconds
79+
- Removes directories older than DELETE_REPO_AFTER seconds
80+
- Before deletion, logs repository URLs to history.txt if a matching .txt file exists
81+
- Handles errors gracefully if deletion fails
82+
83+
The repository URL is extracted from the first .txt file in each directory,
84+
assuming the filename format: "owner-repository.txt"
85+
"""
86+
while True:
87+
try:
88+
if not TMP_BASE_PATH.exists():
89+
await asyncio.sleep(60)
90+
continue
91+
92+
current_time = time.time()
93+
94+
for folder in TMP_BASE_PATH.iterdir():
95+
if folder.is_dir():
96+
continue
97+
98+
# Skip if folder is not old enough
99+
if current_time - folder.stat().st_ctime <= DELETE_REPO_AFTER:
100+
continue
101+
102+
await _process_folder(folder)
103+
104+
except Exception as e:
105+
print(f"Error in _remove_old_repositories: {e}")
106+
107+
await asyncio.sleep(60)
108+
109+
110+
async def _process_folder(folder: Path) -> None:
111+
"""
112+
Process a single folder for deletion and logging.
113+
114+
Parameters
115+
----------
116+
folder : Path
117+
The path to the folder to be processed.
118+
"""
119+
# Try to log repository URL before deletion
120+
try:
121+
txt_files = [f for f in folder.iterdir() if f.suffix == ".txt"]
122+
123+
# Extract owner and repository name from the filename
124+
if txt_files and "-" in (filename := txt_files[0].stem):
125+
owner, repo = filename.split("-", 1)
126+
repo_url = f"{owner}/{repo}"
127+
128+
with open("history.txt", mode="a", encoding="utf-8") as history:
129+
history.write(f"{repo_url}\n")
130+
131+
except Exception as e:
132+
print(f"Error logging repository URL for {folder}: {e}")
133+
134+
# Delete the folder
135+
try:
136+
shutil.rmtree(folder)
137+
except Exception as e:
138+
print(f"Error deleting {folder}: {e}")

0 commit comments

Comments
 (0)