diff --git a/lib/crewai-tools/pyproject.toml b/lib/crewai-tools/pyproject.toml index 531edc9426..97cd1f8d2e 100644 --- a/lib/crewai-tools/pyproject.toml +++ b/lib/crewai-tools/pyproject.toml @@ -26,6 +26,11 @@ Documentation = "https://docs.crewai.com" [project.optional-dependencies] +skim = [ + "x402[evm]>=2.0.0", + "eth-account>=0.13.0", + "requests>=2.31.0", +] scrapfly-sdk = [ "scrapfly-sdk>=0.8.19", ] diff --git a/lib/crewai-tools/src/crewai_tools/__init__.py b/lib/crewai-tools/src/crewai_tools/__init__.py index 2f88535850..6707326d6c 100644 --- a/lib/crewai-tools/src/crewai_tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/__init__.py @@ -188,6 +188,7 @@ from crewai_tools.tools.singlestore_search_tool.singlestore_search_tool import ( SingleStoreSearchTool, ) +from crewai_tools.tools.skim_reader_tool.skim_reader_tool import SkimReaderTool from crewai_tools.tools.snowflake_search_tool.snowflake_search_tool import ( SnowflakeConfig, SnowflakeSearchTool, @@ -311,6 +312,7 @@ "SerplyWebSearchTool", "SerplyWebpageToMarkdownTool", "SingleStoreSearchTool", + "SkimReaderTool", "SnowflakeConfig", "SnowflakeSearchTool", "SpiderTool", diff --git a/lib/crewai-tools/src/crewai_tools/tools/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/__init__.py index 18bf4e5638..608cd2b3d5 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/tools/__init__.py @@ -174,6 +174,7 @@ SerplyWebpageToMarkdownTool, ) from crewai_tools.tools.singlestore_search_tool import SingleStoreSearchTool +from crewai_tools.tools.skim_reader_tool.skim_reader_tool import SkimReaderTool from crewai_tools.tools.snowflake_search_tool import ( SnowflakeConfig, SnowflakeSearchTool, @@ -293,6 +294,7 @@ "SerplyWebSearchTool", "SerplyWebpageToMarkdownTool", "SingleStoreSearchTool", + "SkimReaderTool", "SnowflakeConfig", "SnowflakeSearchTool", "SnowflakeSearchToolInput", diff --git a/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/README.md b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/README.md new file mode 100644 index 0000000000..0c47a5e681 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/README.md @@ -0,0 +1,67 @@ +# SkimReaderTool + +## Description + +[Skim](https://skim402.com) is the x402-native clean reader API for AI agents. +Give it any URL and it returns clean, agent-ready Markdown — nav, ads, and +boilerplate stripped — plus structured metadata (title, byline, published date, +language, excerpt). + +`SkimReaderTool` is pay-per-call over the [x402](https://x402.org) protocol: +each read costs **$0.002 in USDC on Base**, paid automatically by a wallet you +control. There is no signup and there are no API keys — your wallet is your +identity. The private key never leaves your machine; it only signs an EIP-3009 +USDC authorization locally. + +## Installation + +Install the x402 client (with EVM support) alongside `crewai[tools]`: + +``` +pip install "x402[evm]" 'crewai[tools]' +``` + +Fund a dedicated Base wallet with a small amount of USDC (about $1 covers ~500 +reads) and expose its private key to the tool: + +``` +export SKIM_WALLET_PRIVATE_KEY=0xYOUR_BASE_WALLET_PRIVATE_KEY +``` + +Use a fresh wallet, never your personal one. Step-by-step wallet setup: +. + +## Example + +```python +from crewai_tools import SkimReaderTool + +tool = SkimReaderTool() # reads SKIM_WALLET_PRIVATE_KEY from the environment +tool.run(url="https://en.wikipedia.org/wiki/HTTP_402") +``` + +Drop it into any agent's tool list: + +```python +from crewai import Agent +from crewai_tools import SkimReaderTool + +researcher = Agent( + role="Research Analyst", + goal="Read and summarize web articles accurately", + backstory="You turn messy web pages into clean, citable notes.", + tools=[SkimReaderTool()], +) +``` + +## Arguments + +- `private_key`: Optional. Hex private key (with or without `0x`) for the Base + wallet that pays for reads. Defaults to the `SKIM_WALLET_PRIVATE_KEY` + environment variable. Use a dedicated wallet, never your personal one. +- `base_url`: Optional. Skim API base URL. Defaults to `https://skim402.com`. +- `max_price_usd`: Optional. Hard per-call price cap in USD. The wallet refuses + to sign for anything above this. Defaults to `0.01` (Skim is `$0.002`/call). +- `include_metadata`: Optional. When `True` (default), prepend a YAML + frontmatter block of the page metadata to the returned Markdown. +- `timeout`: Optional. Per-request timeout in seconds. Defaults to `60`. diff --git a/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/skim_reader_tool.py b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/skim_reader_tool.py new file mode 100644 index 0000000000..c7a86cff64 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/skim_reader_tool/skim_reader_tool.py @@ -0,0 +1,210 @@ +"""CrewAI tool for Skim — the x402-native clean reader API for AI agents. + +Exposes :class:`SkimReaderTool`, a CrewAI ``BaseTool`` that fetches any URL and +returns clean, agent-ready Markdown plus structured metadata. Each call is paid +automatically over the x402 protocol ($0.002 in USDC on Base) using a wallet you +control. The private key never leaves your machine — it only signs an EIP-3009 +USDC authorization locally. +""" + +from __future__ import annotations + +import os +from typing import Any, Optional + +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, Field, PrivateAttr, SecretStr + +DEFAULT_BASE_URL = "https://skim402.com" + + +def _yaml_scalar(value: Any) -> str: + """Render a metadata value as a safe single-line YAML scalar. + + Collapses internal whitespace/newlines and double-quotes the value when it + contains characters that could otherwise produce invalid or ambiguous YAML. + """ + text = " ".join(str(value).split()) + needs_quoting = ( + text == "" + or text[0] in "!&*?|>%@`\"'#,[]{}:-" + or ": " in text + or text.endswith(":") + or text[0] in " " + ) + if needs_quoting: + escaped = text.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + return text + + +_TOOL_DESCRIPTION = ( + "Fetch any URL and return clean, agent-ready Markdown via Skim (skim402.com). " + "Strips nav, ads, and boilerplate; preserves the article body plus structured " + "metadata (title, byline, published date, language, excerpt). Pays $0.002 per " + "call in USDC on Base over the x402 protocol — no API keys, no signup. Use this " + "whenever you need to read web content: articles, docs, blog posts, GitHub " + "READMEs, research papers, and similar pages." +) + + +class SkimReaderToolSchema(BaseModel): + """Input schema for :class:`SkimReaderTool`.""" + + url: str = Field( + description="The fully-qualified URL to fetch and clean (https://...).", + ) + + +class SkimReaderTool(BaseTool): + """Read any URL as clean Markdown via Skim, paying per call over x402. + + The tool lazily builds a payment-aware HTTP session the first time it runs, + using your Base wallet's private key to sign USDC authorizations on demand. + + Args: + private_key (str): Optional. Hex private key (with or without ``0x``) for + the Base wallet that pays for reads. Falls back to the + ``SKIM_WALLET_PRIVATE_KEY`` environment variable. Use a dedicated + wallet, never your personal one. + base_url (str): Optional. Skim API base URL. Defaults to + ``https://skim402.com``. + max_price_usd (float): Optional. Hard per-call price cap in USD. The + wallet refuses to sign for anything above this. Defaults to ``0.01`` + (Skim is ``$0.002``). + include_metadata (bool): Optional. When ``True`` (default), prepend a YAML + frontmatter block of the page metadata to the returned Markdown. + timeout (float): Optional. Per-request timeout in seconds. Defaults to + ``60``. + + Example: + from crewai_tools import SkimReaderTool + + tool = SkimReaderTool() # reads SKIM_WALLET_PRIVATE_KEY from the env + tool.run(url="https://en.wikipedia.org/wiki/HTTP_402") + """ + + name: str = "Skim web reader" + description: str = _TOOL_DESCRIPTION + args_schema: type[BaseModel] = SkimReaderToolSchema + + base_url: str = DEFAULT_BASE_URL + max_price_usd: float = 0.01 + include_metadata: bool = True + timeout: float = 60.0 + private_key: Optional[SecretStr] = Field(default=None, exclude=True, repr=False) + + _session: Any = PrivateAttr(default=None) + package_dependencies: list[str] = Field( + default_factory=lambda: ["x402[evm]", "eth-account", "requests"] + ) + env_vars: list[EnvVar] = Field( + default_factory=lambda: [ + EnvVar( + name="SKIM_WALLET_PRIVATE_KEY", + description=( + "Hex private key for the Base wallet that pays for Skim reads " + "over x402. Needed unless private_key is passed to the " + "constructor. The key never leaves your machine." + ), + required=False, + ), + ] + ) + + def _get_session(self) -> Any: + """Build (and cache) a requests Session that auto-pays 402 responses.""" + if self._session is not None: + return self._session + + try: + import requests + from eth_account import Account + from x402 import x402ClientSync + from x402.client import max_amount + from x402.http.clients.requests import wrapRequestsWithPayment + from x402.mechanisms.evm.exact.register import register_exact_evm_client + from x402.mechanisms.evm.signers import EthAccountSigner + except ImportError as exc: # pragma: no cover - import-guard + raise ImportError( + "SkimReaderTool needs the x402 client with EVM support. Install it " + 'with: pip install "x402[evm]" requests eth-account' + ) from exc + + key = ( + self.private_key.get_secret_value() + if self.private_key is not None + else os.environ.get("SKIM_WALLET_PRIVATE_KEY") + ) + if not key: + raise ValueError( + "Skim requires payment via x402. Provide a Base wallet funded with " + "USDC by setting the SKIM_WALLET_PRIVATE_KEY environment variable, or " + "by passing private_key=... to SkimReaderTool(). The key never leaves " + "your machine — it only signs payment authorizations locally." + ) + + normalized = key[2:] if key.startswith("0x") else key + if len(normalized) != 64 or any( + c not in "0123456789abcdefABCDEF" for c in normalized + ): + raise ValueError( + "SKIM_WALLET_PRIVATE_KEY must be a 64-character hex string (with or " + "without a 0x prefix)." + ) + + account = Account.from_key("0x" + normalized) + cap_atomic = int(round(self.max_price_usd * 1_000_000)) # USDC has 6 decimals + client = x402ClientSync() + register_exact_evm_client( + client, + EthAccountSigner(account), + policies=[max_amount(cap_atomic)], + ) + self._session = wrapRequestsWithPayment(requests.Session(), client) + return self._session + + def _run(self, url: str) -> str: + session = self._get_session() + endpoint = self.base_url.rstrip("/") + "/api/v1/read" + + try: + res = session.post( + endpoint, + json={"url": url, "mode": "basic"}, + timeout=self.timeout, + ) + except Exception as exc: # network / payment-signing failure + raise RuntimeError( + f"Skim request failed: {exc}. Common causes: the wallet has no USDC " + f"on Base, or the price exceeded max_price_usd (${self.max_price_usd})." + ) from exc + + if not getattr(res, "ok", res.status_code < 400): + body = (res.text or "").strip() + raise RuntimeError( + f"Skim returned {res.status_code} {getattr(res, 'reason', '')}: " + f"{body or '(no body)'}" + ) + + try: + data = res.json() + except ValueError as exc: + raise RuntimeError( + "Skim returned a non-JSON response. This usually means the request " + f"did not reach the Skim API. Underlying error: {exc}" + ) from exc + + markdown = data.get("markdown") or data.get("text") or "" + + metadata = data.get("metadata") + if self.include_metadata and isinstance(metadata, dict): + meta_lines = [ + f"{k}: {_yaml_scalar(v)}" + for k, v in metadata.items() + if v is not None and v != "" + ] + if meta_lines: + markdown = "---\n" + "\n".join(meta_lines) + "\n---\n\n" + markdown + + return markdown diff --git a/lib/crewai-tools/tests/tools/skim_reader_tool_test.py b/lib/crewai-tools/tests/tools/skim_reader_tool_test.py new file mode 100644 index 0000000000..09e571e185 --- /dev/null +++ b/lib/crewai-tools/tests/tools/skim_reader_tool_test.py @@ -0,0 +1,132 @@ +"""Unit tests for SkimReaderTool. + +These tests inject a fake payment-aware session (via the cached ``_session`` +attribute), so they never touch the network or sign a real payment. +""" + +import pytest + +from crewai_tools import SkimReaderTool +from crewai_tools.tools.skim_reader_tool.skim_reader_tool import _yaml_scalar + +VALID_KEY = "0x" + "ab" * 32 + + +class _FakeResp: + def __init__(self, status=200, payload=None, text="", reason="OK"): + self.status_code = status + self._payload = payload or {} + self.text = text + self.reason = reason + self.ok = 200 <= status < 300 + + def json(self): + return self._payload + + +class _FakeSession: + def __init__(self, resp): + self._resp = resp + self.calls = [] + + def post(self, url, json=None, timeout=None): + self.calls.append({"url": url, "json": json, "timeout": timeout}) + return self._resp + + +def test_run_returns_markdown_with_frontmatter(): + tool = SkimReaderTool(private_key=VALID_KEY) + fake = _FakeSession( + _FakeResp( + payload={ + "markdown": "# Title\n\nBody text.", + "metadata": { + "title": "Title", + "byline": "Jane Doe", + "lang": "en", + "excerpt": "", # empty values are dropped + "siteName": None, # None values are dropped + }, + } + ) + ) + tool._session = fake + + out = tool._run(url="https://example.com/a") + + assert out.startswith("---\n") + assert "title: Title" in out + assert "byline: Jane Doe" in out + assert "lang: en" in out + assert "excerpt:" not in out + assert "siteName:" not in out + assert out.rstrip().endswith("Body text.") + + call = fake.calls[0] + assert call["url"].endswith("/api/v1/read") + assert call["json"] == {"url": "https://example.com/a", "mode": "basic"} + + +def test_include_metadata_false_returns_plain_markdown(): + tool = SkimReaderTool(private_key=VALID_KEY, include_metadata=False) + tool._session = _FakeSession( + _FakeResp(payload={"markdown": "# Title", "metadata": {"title": "Title"}}) + ) + + assert tool._run(url="https://example.com/a") == "# Title" + + +def test_falls_back_to_text_when_no_markdown(): + tool = SkimReaderTool(private_key=VALID_KEY, include_metadata=False) + tool._session = _FakeSession(_FakeResp(payload={"text": "plain text"})) + + assert tool._run(url="https://example.com/a") == "plain text" + + +def test_custom_base_url_is_used(): + tool = SkimReaderTool(private_key=VALID_KEY, base_url="https://example.test/") + fake = _FakeSession(_FakeResp(payload={"markdown": "x"})) + tool._session = fake + + tool._run(url="https://example.com/a") + + assert fake.calls[0]["url"] == "https://example.test/api/v1/read" + + +def test_http_error_raises_runtime_error(): + tool = SkimReaderTool(private_key=VALID_KEY) + tool._session = _FakeSession( + _FakeResp(status=502, text="upstream boom", reason="Bad Gateway") + ) + + with pytest.raises(RuntimeError) as exc: + tool._run(url="https://example.com/a") + + assert "502" in str(exc.value) + + +def test_non_json_response_raises_runtime_error(): + tool = SkimReaderTool(private_key=VALID_KEY) + + class _BadJsonResp(_FakeResp): + def json(self): + raise ValueError("Expecting value") + + tool._session = _FakeSession(_BadJsonResp(text="oops")) + + with pytest.raises(RuntimeError): + tool._run(url="https://example.com/a") + + +def test_yaml_scalar_quotes_ambiguous_values(): + assert _yaml_scalar("plain title") == "plain title" + assert _yaml_scalar("key: value").startswith('"') + assert _yaml_scalar("") == '""' + assert _yaml_scalar("multi\nline\ntext") == "multi line text" + + +def test_tool_metadata_is_set(): + tool = SkimReaderTool(private_key=VALID_KEY) + assert tool.name == "Skim web reader" + assert "x402" in tool.description + assert tool.args_schema is not None diff --git a/lib/crewai-tools/tool.specs.json b/lib/crewai-tools/tool.specs.json index 795fa932c4..6c6045f730 100644 --- a/lib/crewai-tools/tool.specs.json +++ b/lib/crewai-tools/tool.specs.json @@ -23398,6 +23398,103 @@ "type": "object" } }, + { + "description": "Fetch any URL and return clean, agent-ready Markdown via Skim (skim402.com). Strips nav, ads, and boilerplate; preserves the article body plus structured metadata (title, byline, published date, language, excerpt). Pays $0.002 per call in USDC on Base over the x402 protocol \u2014 no API keys, no signup. Use this whenever you need to read web content: articles, docs, blog posts, GitHub READMEs, research papers, and similar pages.", + "env_vars": [ + { + "default": null, + "description": "Hex private key for the Base wallet that pays for Skim reads over x402. Needed unless private_key is passed to the constructor. The key never leaves your machine.", + "name": "SKIM_WALLET_PRIVATE_KEY", + "required": false + } + ], + "humanized_name": "Skim web reader", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "description": "Read any URL as clean Markdown via Skim, paying per call over x402.\n\nThe tool lazily builds a payment-aware HTTP session the first time it runs,\nusing your Base wallet's private key to sign USDC authorizations on demand.\n\nArgs:\n private_key (str): Optional. Hex private key (with or without ``0x``) for\n the Base wallet that pays for reads. Falls back to the\n ``SKIM_WALLET_PRIVATE_KEY`` environment variable. Use a dedicated\n wallet, never your personal one.\n base_url (str): Optional. Skim API base URL. Defaults to\n ``https://skim402.com``.\n max_price_usd (float): Optional. Hard per-call price cap in USD. The\n wallet refuses to sign for anything above this. Defaults to ``0.01``\n (Skim is ``$0.002``).\n include_metadata (bool): Optional. When ``True`` (default), prepend a YAML\n frontmatter block of the page metadata to the returned Markdown.\n timeout (float): Optional. Per-request timeout in seconds. Defaults to\n ``60``.\n\nExample:\n from crewai_tools import SkimReaderTool\n\n tool = SkimReaderTool() # reads SKIM_WALLET_PRIVATE_KEY from the env\n tool.run(url=\"https://en.wikipedia.org/wiki/HTTP_402\")", + "properties": { + "base_url": { + "default": "https://skim402.com", + "title": "Base Url", + "type": "string" + }, + "include_metadata": { + "default": true, + "title": "Include Metadata", + "type": "boolean" + }, + "max_price_usd": { + "default": 0.01, + "title": "Max Price Usd", + "type": "number" + }, + "timeout": { + "default": 60.0, + "title": "Timeout", + "type": "number" + } + }, + "required": [], + "title": "SkimReaderTool", + "type": "object" + }, + "name": "SkimReaderTool", + "package_dependencies": [ + "x402[evm]", + "eth-account", + "requests" + ], + "run_params_schema": { + "properties": { + "url": { + "description": "The fully-qualified URL to fetch and clean (https://...).", + "title": "Url", + "type": "string" + } + }, + "required": [ + "url" + ], + "title": "SkimReaderToolSchema", + "type": "object" + } + }, { "description": "Execute SQL queries or semantic search on Snowflake data warehouse. Supports both raw SQL and natural language queries.", "env_vars": [],