From 7033a68c1663d30a256aa87eb0f6d3c1fbaa371d Mon Sep 17 00:00:00 2001 From: JessieJanie Date: Sat, 20 Jun 2026 11:44:17 -0700 Subject: [PATCH 1/2] feat(tools): add SkimReaderTool (x402-paid clean web reader) Adds SkimReaderTool, which fetches any URL and returns clean, agent-ready Markdown plus structured metadata via Skim (skim402.com). Each call is paid automatically over the x402 protocol ($0.002 in USDC on Base) using a wallet the user controls; no API keys or signup. The private key signs USDC payment authorizations locally and is never transmitted. - lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/ (tool + README) - wires exports in tools/__init__.py and crewai_tools/__init__.py - adds the optional 'x402' extra in lib/crewai-tools/pyproject.toml - adds tests/tools/skim_reader_tool_test.py (fully mocked, offline) --- lib/crewai-tools/pyproject.toml | 5 + lib/crewai-tools/src/crewai_tools/__init__.py | 2 + .../src/crewai_tools/tools/__init__.py | 2 + .../tools/skim_reader_tool/README.md | 69 ++++++ .../tools/skim_reader_tool/__init__.py | 0 .../skim_reader_tool/skim_reader_tool.py | 221 ++++++++++++++++++ .../tests/tools/skim_reader_tool_test.py | 122 ++++++++++ 7 files changed, 421 insertions(+) create mode 100644 lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/README.md create mode 100644 lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/__init__.py create mode 100644 lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/skim_reader_tool.py create mode 100644 lib/crewai-tools/tests/tools/skim_reader_tool_test.py diff --git a/lib/crewai-tools/pyproject.toml b/lib/crewai-tools/pyproject.toml index 531edc9426..0a01e009df 100644 --- a/lib/crewai-tools/pyproject.toml +++ b/lib/crewai-tools/pyproject.toml @@ -147,6 +147,11 @@ e2b = [ "e2b~=2.20.0", "e2b-code-interpreter~=2.6.0", ] +x402 = [ + "x402[evm]>=2.0.0", + "eth-account>=0.13.0", + "requests>=2.31.0", +] [tool.uv] diff --git a/lib/crewai-tools/src/crewai_tools/__init__.py b/lib/crewai-tools/src/crewai_tools/__init__.py index 2f88535850..6707326d6c 100644 --- a/lib/crewai-tools/src/crewai_tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/__init__.py @@ -188,6 +188,7 @@ from crewai_tools.tools.singlestore_search_tool.singlestore_search_tool import ( SingleStoreSearchTool, ) +from crewai_tools.tools.skim_reader_tool.skim_reader_tool import SkimReaderTool from crewai_tools.tools.snowflake_search_tool.snowflake_search_tool import ( SnowflakeConfig, SnowflakeSearchTool, @@ -311,6 +312,7 @@ "SerplyWebSearchTool", "SerplyWebpageToMarkdownTool", "SingleStoreSearchTool", + "SkimReaderTool", "SnowflakeConfig", "SnowflakeSearchTool", "SpiderTool", diff --git a/lib/crewai-tools/src/crewai_tools/tools/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/__init__.py index 18bf4e5638..608cd2b3d5 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/tools/__init__.py @@ -174,6 +174,7 @@ SerplyWebpageToMarkdownTool, ) from crewai_tools.tools.singlestore_search_tool import SingleStoreSearchTool +from crewai_tools.tools.skim_reader_tool.skim_reader_tool import SkimReaderTool from crewai_tools.tools.snowflake_search_tool import ( SnowflakeConfig, SnowflakeSearchTool, @@ -293,6 +294,7 @@ "SerplyWebSearchTool", "SerplyWebpageToMarkdownTool", "SingleStoreSearchTool", + "SkimReaderTool", "SnowflakeConfig", "SnowflakeSearchTool", "SnowflakeSearchToolInput", diff --git a/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/README.md b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/README.md new file mode 100644 index 0000000000..8e88da0612 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/README.md @@ -0,0 +1,69 @@ +# SkimReaderTool + +## Description + +[Skim](https://skim402.com) is an x402-native clean reader API for AI agents. +Give it a URL and it returns clean, agent-ready Markdown plus structured +metadata (title, byline, published date, language, excerpt) — nav, ads, and +boilerplate stripped out. + +`SkimReaderTool` lets a CrewAI agent read any web page (articles, docs, blog +posts, GitHub READMEs, research papers) as Markdown. Reads are paid per call +over the [x402 protocol](https://x402.org) — $0.002 in USDC on Base — using a +wallet you control. There are no API keys and no signup: the wallet is the +identity, and payment happens automatically on the HTTP 402 handshake. + +## Installation + +Install the tool with the `x402` extra, which pulls the x402 client with EVM +support: + +```shell +pip install 'crewai[tools]' +pip install 'crewai-tools[x402]' +``` + +## Requirements + +- A Base wallet private key, funded with a small amount of USDC on Base, exposed + as the `SKIM_WALLET_PRIVATE_KEY` environment variable (or passed via + `private_key=`). Use a dedicated wallet, never your personal one. The key is + used only to sign payment authorizations locally and never leaves your machine. + +## Example + +```python +from crewai_tools import SkimReaderTool + +# Reads SKIM_WALLET_PRIVATE_KEY from the environment. +tool = SkimReaderTool() + +markdown = tool.run(url="https://en.wikipedia.org/wiki/HTTP_402") +print(markdown) +``` + +Or wire it into an agent: + +```python +from crewai import Agent +from crewai_tools import SkimReaderTool + +researcher = Agent( + role="Researcher", + goal="Read and summarize web pages", + backstory="An analyst who reads primary sources before drawing conclusions.", + tools=[SkimReaderTool()], +) +``` + +## Arguments + +- `private_key` (`SecretStr`, optional): Hex private key for the paying Base + wallet (with or without `0x`). Falls back to `SKIM_WALLET_PRIVATE_KEY`. +- `base_url` (`str`, optional): Skim API base URL. Defaults to + `https://skim402.com`. +- `max_price_usd` (`float`, optional): Hard per-call price cap in USD. The wallet + refuses to sign for anything above this. Defaults to `0.01` (Skim is `$0.002`). +- `include_metadata` (`bool`, optional): When `True` (default), prepend a YAML + frontmatter block of the page metadata to the returned Markdown. +- `timeout` (`float`, optional): Per-request timeout in seconds. Defaults to `60`. diff --git a/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/skim_reader_tool.py b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/skim_reader_tool.py new file mode 100644 index 0000000000..ce682b17bf --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/skim_reader_tool.py @@ -0,0 +1,221 @@ +"""CrewAI tool for Skim — the x402-native clean reader API for AI agents. + +Skim (https://skim402.com) turns any URL into clean, agent-ready Markdown plus +structured metadata. Reads are paid per call over the x402 protocol ($0.002 in +USDC on Base) using a wallet you control — no API keys, no signup. +""" + +from __future__ import annotations + +import importlib +import os +from typing import Any + +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, SecretStr +import requests + +from crewai_tools.security.safe_path import validate_url + + +DEFAULT_BASE_URL = "https://skim402.com" + + +def _yaml_scalar(value: Any) -> str: + """Render a metadata value as a safe single-line YAML scalar. + + Collapses internal whitespace/newlines and double-quotes the value when it + contains characters that could otherwise produce invalid or ambiguous YAML. + """ + text = " ".join(str(value).split()) + needs_quoting = ( + text == "" + or text[0] in "!&*?|>%@`\"'#,[]{}:-" + or ": " in text + or text.endswith(":") + or text[0] == " " + ) + if needs_quoting: + escaped = text.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + return text + + +_TOOL_DESCRIPTION = ( + "Fetch any URL and return clean, agent-ready Markdown via Skim (skim402.com). " + "Strips nav, ads, and boilerplate; preserves the article body plus structured " + "metadata (title, byline, published date, language, excerpt). Pays $0.002 per " + "call in USDC on Base over the x402 protocol — no API keys, no signup. Use this " + "whenever you need to read web content: articles, docs, blog posts, GitHub " + "READMEs, research papers, and similar pages." +) + + +class SkimReaderToolSchema(BaseModel): + """Input schema for :class:`SkimReaderTool`.""" + + url: str = Field( + description="The fully-qualified URL to fetch and clean (https://...)." + ) + + +class SkimReaderTool(BaseTool): + """Read any URL as clean Markdown via Skim, paying per call over x402. + + The tool lazily builds a payment-aware HTTP session the first time it runs, + using your Base wallet's private key to sign USDC authorizations on demand. + The key is used only to sign locally and never leaves your machine. + + Args: + private_key (SecretStr): Hex private key (with or without ``0x``) for the + Base wallet that pays for reads. Falls back to the + ``SKIM_WALLET_PRIVATE_KEY`` environment variable. Use a dedicated + wallet, never your personal one. + base_url (str): Skim API base URL. Defaults to ``https://skim402.com``. + max_price_usd (float): Hard per-call price cap in USD. The wallet refuses + to sign for anything above this. Defaults to ``0.01`` (Skim is + ``$0.002``). + include_metadata (bool): When ``True`` (default), prepend a YAML + frontmatter block of the page metadata to the returned Markdown. + timeout (float): Per-request timeout in seconds. Defaults to ``60``. + """ + + model_config = ConfigDict( + arbitrary_types_allowed=True, validate_assignment=True, frozen=False + ) + name: str = "Skim web reader" + description: str = _TOOL_DESCRIPTION + args_schema: type[BaseModel] = SkimReaderToolSchema + + private_key: SecretStr | None = Field(default=None, exclude=True, repr=False) + base_url: str = DEFAULT_BASE_URL + max_price_usd: float = 0.01 + include_metadata: bool = True + timeout: float = 60.0 + + package_dependencies: list[str] = Field( + default_factory=lambda: ["x402", "eth-account", "requests"] + ) + env_vars: list[EnvVar] = Field( + default_factory=lambda: [ + EnvVar( + name="SKIM_WALLET_PRIVATE_KEY", + description=( + "Hex private key for the Base wallet that pays for Skim reads. " + "Used only to sign x402 payment authorizations locally." + ), + required=False, + ), + ] + ) + + _session: Any = PrivateAttr(default=None) + + def _get_session(self) -> Any: + """Build (and cache) a requests Session that auto-pays 402 responses.""" + if self._session is not None: + return self._session + + try: + account_factory = importlib.import_module("eth_account").Account + x402_client_sync = importlib.import_module("x402").x402ClientSync + max_amount = importlib.import_module("x402.client").max_amount + wrap_with_payment = importlib.import_module( + "x402.http.clients.requests" + ).wrapRequestsWithPayment + register_exact_evm_client = importlib.import_module( + "x402.mechanisms.evm.exact.register" + ).register_exact_evm_client + eth_account_signer = importlib.import_module( + "x402.mechanisms.evm.signers" + ).EthAccountSigner + except ImportError as exc: + raise ImportError( + "SkimReaderTool needs the x402 client with EVM support. Install it " + "with: pip install 'x402[evm]' requests eth-account" + ) from exc + + key = ( + self.private_key.get_secret_value() + if self.private_key is not None + else os.environ.get("SKIM_WALLET_PRIVATE_KEY") + ) + if not key: + raise ValueError( + "Skim requires payment via x402. Provide a Base wallet funded with " + "USDC by setting the SKIM_WALLET_PRIVATE_KEY environment variable, " + "or by passing private_key=... to SkimReaderTool(). The key never " + "leaves your machine — it only signs payment authorizations locally." + ) + + normalized = key[2:] if key.startswith("0x") else key + if len(normalized) != 64 or any( + c not in "0123456789abcdefABCDEF" for c in normalized + ): + raise ValueError( + "SKIM_WALLET_PRIVATE_KEY must be a 64-character hex string (with or " + "without a 0x prefix)." + ) + + account = account_factory.from_key("0x" + normalized) + cap_atomic = round(self.max_price_usd * 1_000_000) # USDC has 6 decimals + client = x402_client_sync() + register_exact_evm_client( + client, + eth_account_signer(account), + policies=[max_amount(cap_atomic)], + ) + self._session = wrap_with_payment(requests.Session(), client) + return self._session + + def _run(self, url: str) -> str: + url = validate_url(url) + session = self._get_session() + endpoint = self.base_url.rstrip("/") + "/api/v1/read" + + try: + res = session.post( + endpoint, + json={"url": url, "mode": "basic"}, + timeout=self.timeout, + ) + except Exception as exc: + raise RuntimeError( + f"Skim request failed: {exc}. Common causes: the wallet has no USDC " + f"on Base, or the price exceeded max_price_usd (${self.max_price_usd})." + ) from exc + + if not getattr(res, "ok", res.status_code < 400): + body = (res.text or "").strip() + raise RuntimeError( + f"Skim returned {res.status_code} {getattr(res, 'reason', '')}: " + f"{body or '(no body)'}" + ) + + try: + data = res.json() + except ValueError as exc: + raise RuntimeError( + "Skim returned a non-JSON response. This usually means the request " + f"did not reach the Skim API. Underlying error: {exc}" + ) from exc + + if not isinstance(data, dict): + raise RuntimeError( + "Skim returned an unexpected response shape (expected a JSON object). " + "This usually means the request did not reach the Skim API." + ) + + markdown: str = data.get("markdown") or data.get("text") or "" + + metadata = data.get("metadata") + if self.include_metadata and isinstance(metadata, dict): + meta_lines = [ + f"{k}: {_yaml_scalar(v)}" + for k, v in metadata.items() + if v is not None and v != "" + ] + if meta_lines: + markdown = "---\n" + "\n".join(meta_lines) + "\n---\n\n" + markdown + + return markdown diff --git a/lib/crewai-tools/tests/tools/skim_reader_tool_test.py b/lib/crewai-tools/tests/tools/skim_reader_tool_test.py new file mode 100644 index 0000000000..51bda861f9 --- /dev/null +++ b/lib/crewai-tools/tests/tools/skim_reader_tool_test.py @@ -0,0 +1,122 @@ +"""Tests for SkimReaderTool. + +These tests are fully mocked: no network calls and no real x402 payments are +made. The payment-aware session and the URL validator are stubbed so the tests +are deterministic and offline. +""" + +import pytest + +from crewai_tools.tools.skim_reader_tool import skim_reader_tool as skim_module +from crewai_tools.tools.skim_reader_tool.skim_reader_tool import SkimReaderTool + + +class _FakeResponse: + def __init__(self, *, status_code=200, json_data=None, text="", reason="OK"): + self.status_code = status_code + self.ok = status_code < 400 + self._json_data = json_data + self.text = text + self.reason = reason + + def json(self): + if self._json_data is None: + raise ValueError("no json") + return self._json_data + + +class _FakeSession: + def __init__(self, response): + self._response = response + self.calls = [] + + def post(self, endpoint, json=None, timeout=None): + self.calls.append({"endpoint": endpoint, "json": json, "timeout": timeout}) + return self._response + + +@pytest.fixture(autouse=True) +def _skip_url_validation(monkeypatch): + # validate_url performs DNS resolution / SSRF checks; bypass it in unit tests. + monkeypatch.setattr(skim_module, "validate_url", lambda url: url) + + +def _make_tool(response, **kwargs): + tool = SkimReaderTool(**kwargs) + tool._session = _FakeSession(response) + return tool + + +def test_defaults_and_schema(): + tool = SkimReaderTool() + assert tool.name == "Skim web reader" + assert tool.base_url == "https://skim402.com" + assert tool.max_price_usd == 0.01 + assert tool.include_metadata is True + assert tool.timeout == 60.0 + assert tool.args_schema is not None + assert "url" in tool.args_schema.model_fields + + +def test_run_returns_markdown_with_frontmatter(): + response = _FakeResponse( + json_data={ + "markdown": "# Hello\n\nBody.", + "metadata": {"title": "Hello", "language": "en", "empty": ""}, + } + ) + tool = _make_tool(response) + + result = tool._run(url="https://example.com") + + assert result.startswith("---\n") + assert "title: Hello" in result + assert "language: en" in result + assert "empty:" not in result # empty values are dropped + assert result.endswith("# Hello\n\nBody.") + # Posts to the read endpoint in basic mode. + call = tool._session.calls[0] + assert call["endpoint"] == "https://skim402.com/api/v1/read" + assert call["json"] == {"url": "https://example.com", "mode": "basic"} + + +def test_run_without_metadata(): + response = _FakeResponse( + json_data={"markdown": "# Hello", "metadata": {"title": "Hello"}} + ) + tool = _make_tool(response, include_metadata=False) + + result = tool._run(url="https://example.com") + + assert result == "# Hello" + + +def test_run_falls_back_to_text(): + response = _FakeResponse(json_data={"text": "plain text", "metadata": {}}) + tool = _make_tool(response) + + assert tool._run(url="https://example.com") == "plain text" + + +def test_run_raises_on_error_status(): + response = _FakeResponse(status_code=502, text="bad gateway", reason="Bad Gateway") + tool = _make_tool(response) + + with pytest.raises(RuntimeError, match="502"): + tool._run(url="https://example.com") + + +def test_get_session_requires_a_key(monkeypatch): + monkeypatch.delenv("SKIM_WALLET_PRIVATE_KEY", raising=False) + tool = SkimReaderTool() + + with pytest.raises(ValueError, match="SKIM_WALLET_PRIVATE_KEY"): + tool._get_session() + + +def test_get_session_rejects_malformed_key(monkeypatch): + monkeypatch.setenv("SKIM_WALLET_PRIVATE_KEY", "not-a-valid-hex-key") + tool = SkimReaderTool() + + with pytest.raises(ValueError, match="64-character hex"): + tool._get_session() From 855d0b3524f31c9a7756a4d227fbfd3c14590f06 Mon Sep 17 00:00:00 2001 From: JessieJanie Date: Sat, 20 Jun 2026 11:48:04 -0700 Subject: [PATCH 2/2] test(tools): add SkimReaderTool _get_session happy-path and url-validation tests Adds an offline (mocked) test that _get_session builds and caches the x402-wrapped requests session, and a test asserting _run validates the URL before issuing the request. Addresses review feedback. --- .../tests/tools/skim_reader_tool_test.py | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/lib/crewai-tools/tests/tools/skim_reader_tool_test.py b/lib/crewai-tools/tests/tools/skim_reader_tool_test.py index 51bda861f9..7925861343 100644 --- a/lib/crewai-tools/tests/tools/skim_reader_tool_test.py +++ b/lib/crewai-tools/tests/tools/skim_reader_tool_test.py @@ -120,3 +120,74 @@ def test_get_session_rejects_malformed_key(monkeypatch): with pytest.raises(ValueError, match="64-character hex"): tool._get_session() + + +def test_get_session_builds_wrapped_session(monkeypatch): + # Mock the lazily-imported x402 / eth_account symbols so no real client, + # signing, or network is involved. + calls = {} + + class _FakeModule: + def __init__(self, attrs): + self.__dict__.update(attrs) + + def fake_import_module(name): + if name == "eth_account": + return _FakeModule( + {"Account": _FakeModule({"from_key": lambda k: ("account", k)})} + ) + if name == "x402": + return _FakeModule({"x402ClientSync": lambda: "client"}) + if name == "x402.client": + return _FakeModule({"max_amount": lambda cap: ("max_amount", cap)}) + if name == "x402.http.clients.requests": + + def wrap(session, client): + calls["wrap"] = (session, client) + return "wrapped-session" + + return _FakeModule({"wrapRequestsWithPayment": wrap}) + if name == "x402.mechanisms.evm.exact.register": + + def register(client, signer, policies=None): + calls["register"] = (client, signer, policies) + + return _FakeModule({"register_exact_evm_client": register}) + if name == "x402.mechanisms.evm.signers": + return _FakeModule({"EthAccountSigner": lambda acct: ("signer", acct)}) + raise AssertionError(f"unexpected import: {name}") + + monkeypatch.setattr(skim_module.importlib, "import_module", fake_import_module) + + tool = SkimReaderTool(private_key="0x" + "a" * 64, max_price_usd=0.01) + session = tool._get_session() + + assert session == "wrapped-session" + assert tool._session == "wrapped-session" + # The payment client was registered and the requests session was wrapped. + assert "register" in calls + assert calls["register"][2] == [("max_amount", 10_000)] # $0.01 -> 10000 atomic + assert "wrap" in calls + # A second call reuses the cached session without re-importing. + monkeypatch.setattr( + skim_module.importlib, + "import_module", + lambda name: (_ for _ in ()).throw(AssertionError("should not re-import")), + ) + assert tool._get_session() == "wrapped-session" + + +def test_run_validates_url(monkeypatch): + seen = {} + + def spy_validate(url): + seen["url"] = url + return url + + monkeypatch.setattr(skim_module, "validate_url", spy_validate) + + response = _FakeResponse(json_data={"text": "ok", "metadata": {}}) + tool = _make_tool(response) + tool._run(url="https://example.com/article") + + assert seen["url"] == "https://example.com/article"