|
| 1 | +"""JSON Schema generator for tool schemas. |
| 2 | +
|
| 3 | +Centralizes the `GenerateJsonSchema` subclass used when rendering tool input and |
| 4 | +output schemas. On top of turning pydantic's schema warnings into errors, it |
| 5 | +enforces SEP-2106: a `$ref` that is not a same-document reference (a JSON Pointer |
| 6 | +such as `#/$defs/Foo` or an `$anchor` such as `#Foo`) is an SSRF / fetch-DoS |
| 7 | +vector and MUST NOT appear in a tool schema. |
| 8 | +
|
| 9 | +See: https://modelcontextprotocol.io/seps/2106-json-schema-2020-12#security-implications |
| 10 | +""" |
| 11 | + |
| 12 | +from __future__ import annotations |
| 13 | + |
| 14 | +from typing import Any, cast |
| 15 | + |
| 16 | +from pydantic.json_schema import GenerateJsonSchema, JsonSchemaValue, JsonSchemaWarningKind |
| 17 | +from pydantic_core import CoreSchema |
| 18 | + |
| 19 | + |
| 20 | +class ExternalSchemaRefError(ValueError): |
| 21 | + """A tool schema contains a `$ref` that is not a same-document reference.""" |
| 22 | + |
| 23 | + |
| 24 | +class StrictJsonSchema(GenerateJsonSchema): |
| 25 | + """Render tool schemas, raising on pydantic warnings and external `$ref`s. |
| 26 | +
|
| 27 | + Warnings (e.g. a non-serializable type) become errors so they surface at tool |
| 28 | + registration instead of silently producing a degenerate schema. External |
| 29 | + `$ref`s -- which pydantic never emits itself, but a user can inject via |
| 30 | + `Field(json_schema_extra=...)` -- are rejected for the same reason (SEP-2106). |
| 31 | + """ |
| 32 | + |
| 33 | + def emit_warning(self, kind: JsonSchemaWarningKind, detail: str) -> None: |
| 34 | + raise ValueError(f"JSON schema warning: {kind} - {detail}") |
| 35 | + |
| 36 | + def generate(self, schema: CoreSchema, mode: Any = "validation") -> JsonSchemaValue: |
| 37 | + json_schema = super().generate(schema, mode) |
| 38 | + _reject_external_refs(json_schema) |
| 39 | + return json_schema |
| 40 | + |
| 41 | + |
| 42 | +def _reject_external_refs(json_schema: JsonSchemaValue) -> None: |
| 43 | + external = sorted(_find_external_refs(json_schema)) |
| 44 | + if external: |
| 45 | + raise ExternalSchemaRefError( |
| 46 | + f"Tool schema contains external $ref(s) that MUST NOT be dereferenced (SEP-2106): " |
| 47 | + f"{', '.join(external)}. Only same-document references (e.g. '#/$defs/Foo') are allowed." |
| 48 | + ) |
| 49 | + |
| 50 | + |
| 51 | +def _find_external_refs(node: Any) -> set[str]: |
| 52 | + external: set[str] = set() |
| 53 | + if isinstance(node, dict): |
| 54 | + mapping = cast("dict[str, Any]", node) |
| 55 | + ref = mapping.get("$ref") |
| 56 | + if isinstance(ref, str) and not ref.startswith("#"): |
| 57 | + external.add(ref) |
| 58 | + for value in mapping.values(): |
| 59 | + external |= _find_external_refs(value) |
| 60 | + elif isinstance(node, list): |
| 61 | + for item in cast("list[Any]", node): |
| 62 | + external |= _find_external_refs(item) |
| 63 | + return external |
0 commit comments