Skip to content

Commit 349ef38

Browse files
bokelleyclaude
andcommitted
feat: support authoritative_location redirects in fetch_adagents
- Implements authoritative_location redirect following per AdCP spec - When adagents.json contains authoritative_location instead of authorized_agents, fetches the referenced URL for actual data - Includes loop detection and max depth limit (5) for security - Validates HTTPS requirement for authoritative_location URLs - Tests for happy path, HTTPS validation, loop detection, max depth Cherry-picked from PR #118 (bokelley/issue-114) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 263665f commit 349ef38

File tree

2 files changed

+209
-19
lines changed

2 files changed

+209
-19
lines changed

src/adcp/adagents.py

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,10 @@ def verify_agent_authorization(
280280
return False
281281

282282

283+
# Maximum number of authoritative_location redirects to follow
284+
MAX_REDIRECT_DEPTH = 5
285+
286+
283287
async def fetch_adagents(
284288
publisher_domain: str,
285289
timeout: float = 10.0,
@@ -288,6 +292,11 @@ async def fetch_adagents(
288292
) -> dict[str, Any]:
289293
"""Fetch and parse adagents.json from publisher domain.
290294
295+
Follows authoritative_location redirects per the AdCP specification. When a
296+
publisher's adagents.json contains an authoritative_location field instead of
297+
authorized_agents, this function fetches the referenced URL to get the actual
298+
authorization data.
299+
291300
Args:
292301
publisher_domain: Domain hosting the adagents.json file
293302
timeout: Request timeout in seconds
@@ -297,11 +306,12 @@ async def fetch_adagents(
297306
If None, a new client is created for this request.
298307
299308
Returns:
300-
Parsed adagents.json data
309+
Parsed adagents.json data (resolved from authoritative_location if present)
301310
302311
Raises:
303312
AdagentsNotFoundError: If adagents.json not found (404)
304-
AdagentsValidationError: If JSON is invalid or malformed
313+
AdagentsValidationError: If JSON is invalid, malformed, or redirects
314+
exceed maximum depth or form a loop
305315
AdagentsTimeoutError: If request times out
306316
307317
Notes:
@@ -311,21 +321,74 @@ async def fetch_adagents(
311321
# Validate and normalize domain for security
312322
publisher_domain = _validate_publisher_domain(publisher_domain)
313323

314-
# Construct URL
324+
# Construct initial URL
315325
url = f"https://{publisher_domain}/.well-known/adagents.json"
316326

327+
# Track visited URLs to detect loops
328+
visited_urls: set[str] = set()
329+
330+
for depth in range(MAX_REDIRECT_DEPTH + 1):
331+
# Check for redirect loop
332+
if url in visited_urls:
333+
raise AdagentsValidationError(
334+
f"Circular redirect detected: {url} already visited"
335+
)
336+
visited_urls.add(url)
337+
338+
data = await _fetch_adagents_url(url, timeout, user_agent, client)
339+
340+
# Check if this is a redirect. A response with authoritative_location but no
341+
# authorized_agents indicates a redirect. If both are present, authorized_agents
342+
# takes precedence (response is treated as final).
343+
if "authoritative_location" in data and "authorized_agents" not in data:
344+
authoritative_url = data["authoritative_location"]
345+
346+
# Validate HTTPS requirement
347+
if not isinstance(authoritative_url, str) or not authoritative_url.startswith(
348+
"https://"
349+
):
350+
raise AdagentsValidationError(
351+
f"authoritative_location must be an HTTPS URL, got: {authoritative_url!r}"
352+
)
353+
354+
# Check if we've exceeded max depth
355+
if depth >= MAX_REDIRECT_DEPTH:
356+
raise AdagentsValidationError(
357+
f"Maximum redirect depth ({MAX_REDIRECT_DEPTH}) exceeded"
358+
)
359+
360+
# Follow the redirect
361+
url = authoritative_url
362+
continue
363+
364+
# We have the final data with authorized_agents (or both fields present,
365+
# in which case authorized_agents takes precedence)
366+
return data
367+
368+
# Unreachable: loop always exits via return or raise above
369+
raise AssertionError("Unreachable") # pragma: no cover
370+
371+
372+
async def _fetch_adagents_url(
373+
url: str,
374+
timeout: float,
375+
user_agent: str,
376+
client: httpx.AsyncClient | None,
377+
) -> dict[str, Any]:
378+
"""Fetch and parse adagents.json from a specific URL.
379+
380+
This is the core fetch logic, separated to support redirect following.
381+
"""
317382
try:
318383
# Use provided client or create a new one
319384
if client is not None:
320-
# Reuse provided client (connection pooling)
321385
response = await client.get(
322386
url,
323387
headers={"User-Agent": user_agent},
324388
timeout=timeout,
325389
follow_redirects=True,
326390
)
327391
else:
328-
# Create new client for single request
329392
async with httpx.AsyncClient() as new_client:
330393
response = await new_client.get(
331394
url,
@@ -334,9 +397,11 @@ async def fetch_adagents(
334397
follow_redirects=True,
335398
)
336399

337-
# Process response (same for both paths)
400+
# Process response
338401
if response.status_code == 404:
339-
raise AdagentsNotFoundError(publisher_domain)
402+
# Extract domain from URL for error message
403+
parsed = urlparse(url)
404+
raise AdagentsNotFoundError(parsed.netloc)
340405

341406
if response.status_code != 200:
342407
raise AdagentsValidationError(
@@ -353,22 +418,29 @@ async def fetch_adagents(
353418
if not isinstance(data, dict):
354419
raise AdagentsValidationError("adagents.json must be a JSON object")
355420

356-
if "authorized_agents" not in data:
357-
raise AdagentsValidationError("adagents.json must have 'authorized_agents' field")
358-
359-
if not isinstance(data["authorized_agents"], list):
360-
raise AdagentsValidationError("'authorized_agents' must be an array")
361-
362-
# Validate mutual exclusivity constraints
363-
try:
364-
validate_adagents(data)
365-
except ValidationError as e:
366-
raise AdagentsValidationError(f"Invalid adagents.json structure: {e}") from e
421+
# If this has authorized_agents, validate it
422+
if "authorized_agents" in data:
423+
if not isinstance(data["authorized_agents"], list):
424+
raise AdagentsValidationError("'authorized_agents' must be an array")
425+
426+
# Validate mutual exclusivity constraints
427+
try:
428+
validate_adagents(data)
429+
except ValidationError as e:
430+
raise AdagentsValidationError(
431+
f"Invalid adagents.json structure: {e}"
432+
) from e
433+
elif "authoritative_location" not in data:
434+
# Neither authorized_agents nor authoritative_location
435+
raise AdagentsValidationError(
436+
"adagents.json must have either 'authorized_agents' or 'authoritative_location'"
437+
)
367438

368439
return data
369440

370441
except httpx.TimeoutException as e:
371-
raise AdagentsTimeoutError(publisher_domain, timeout) from e
442+
parsed = urlparse(url)
443+
raise AdagentsTimeoutError(parsed.netloc, timeout) from e
372444
except httpx.RequestError as e:
373445
raise AdagentsValidationError(f"Failed to fetch adagents.json: {e}") from e
374446

tests/test_adagents.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,124 @@ async def test_fetch_success(self):
400400
call_args = mock_client.get.call_args
401401
assert "https://example.com/.well-known/adagents.json" in str(call_args)
402402

403+
@pytest.mark.asyncio
404+
async def test_fetch_follows_authoritative_location(self):
405+
"""Should follow authoritative_location redirect and return resolved data."""
406+
from adcp.adagents import fetch_adagents
407+
408+
# Initial response has authoritative_location redirect
409+
redirect_response_data = {
410+
"$schema": "/schemas/2.6.0/adagents.json",
411+
"authoritative_location": "https://cdn.example.com/adagents/v2/adagents.json",
412+
"last_updated": "2025-01-15T10:00:00Z",
413+
}
414+
415+
# Final resolved data at the authoritative location
416+
resolved_data = {
417+
"$schema": "/schemas/2.6.0/adagents.json",
418+
"authorized_agents": [
419+
{
420+
"url": "https://agent.example.com",
421+
"authorized_for": "All properties",
422+
"authorization_type": "property_tags",
423+
"property_tags": ["all"],
424+
}
425+
],
426+
"last_updated": "2025-01-15T10:00:00Z",
427+
}
428+
429+
# Mock client that returns different responses based on URL
430+
called_urls: list[str] = []
431+
responses = [redirect_response_data, resolved_data]
432+
433+
async def mock_get(url, **kwargs):
434+
called_urls.append(url)
435+
mock_response = MagicMock()
436+
mock_response.status_code = 200
437+
mock_response.json.return_value = responses[len(called_urls) - 1]
438+
return mock_response
439+
440+
mock_client = MagicMock()
441+
mock_client.get = mock_get
442+
443+
result = await fetch_adagents("example.com", client=mock_client)
444+
445+
assert result == resolved_data
446+
assert called_urls == [
447+
"https://example.com/.well-known/adagents.json",
448+
"https://cdn.example.com/adagents/v2/adagents.json",
449+
]
450+
451+
@pytest.mark.asyncio
452+
async def test_fetch_rejects_non_https_authoritative_location(self):
453+
"""Should reject authoritative_location that uses HTTP instead of HTTPS."""
454+
from adcp.adagents import fetch_adagents
455+
456+
redirect_response_data = {
457+
"$schema": "/schemas/2.6.0/adagents.json",
458+
"authoritative_location": "http://cdn.example.com/adagents.json", # HTTP not HTTPS
459+
"last_updated": "2025-01-15T10:00:00Z",
460+
}
461+
462+
mock_response = MagicMock()
463+
mock_response.status_code = 200
464+
mock_response.json.return_value = redirect_response_data
465+
466+
mock_client = create_mock_httpx_client(mock_response)
467+
468+
with pytest.raises(AdagentsValidationError, match="HTTPS"):
469+
await fetch_adagents("example.com", client=mock_client)
470+
471+
@pytest.mark.asyncio
472+
async def test_fetch_prevents_redirect_loop(self):
473+
"""Should detect and prevent circular redirect loops."""
474+
from adcp.adagents import fetch_adagents
475+
476+
# Circular redirect: A -> B -> A
477+
redirect_data = {
478+
"$schema": "/schemas/2.6.0/adagents.json",
479+
"authoritative_location": "https://example.com/.well-known/adagents.json",
480+
"last_updated": "2025-01-15T10:00:00Z",
481+
}
482+
483+
mock_response = MagicMock()
484+
mock_response.status_code = 200
485+
mock_response.json.return_value = redirect_data
486+
487+
mock_client = create_mock_httpx_client(mock_response)
488+
489+
with pytest.raises(AdagentsValidationError, match="redirect loop|already visited"):
490+
await fetch_adagents("example.com", client=mock_client)
491+
492+
@pytest.mark.asyncio
493+
async def test_fetch_enforces_max_redirect_depth(self):
494+
"""Should enforce maximum redirect depth to prevent abuse."""
495+
from adcp.adagents import fetch_adagents
496+
497+
# Create a long chain of redirects
498+
call_count = [0]
499+
500+
async def mock_get(url, **kwargs):
501+
call_count[0] += 1
502+
mock_response = MagicMock()
503+
mock_response.status_code = 200
504+
# Always return a redirect to a new URL
505+
mock_response.json.return_value = {
506+
"$schema": "/schemas/2.6.0/adagents.json",
507+
"authoritative_location": f"https://cdn{call_count[0]}.example.com/adagents.json",
508+
"last_updated": "2025-01-15T10:00:00Z",
509+
}
510+
return mock_response
511+
512+
mock_client = MagicMock()
513+
mock_client.get = mock_get
514+
515+
with pytest.raises(AdagentsValidationError, match="redirect|depth"):
516+
await fetch_adagents("example.com", client=mock_client)
517+
518+
# Should stop after reasonable number of redirects (not go forever)
519+
assert call_count[0] <= 10
520+
403521

404522
class TestVerifyAgentForProperty:
405523
"""Test convenience wrapper for fetching and verifying in one call."""

0 commit comments

Comments
 (0)