From 6590ae41e827cfacc2a44cc8dd7250f6a8acf2d7 Mon Sep 17 00:00:00 2001 From: Domantas Jurkus Date: Fri, 17 Apr 2026 09:45:26 +0300 Subject: [PATCH 1/8] feat: Add AI targets + toolsets --- src/constants.ts | 11 ++++++ src/index.ts | 16 +++++++- src/tools/amazon-search-parsed-tool.ts | 3 +- src/tools/chatgpt-tool.ts | 53 ++++++++++++++++++++++++++ src/tools/google-search-parsed-tool.ts | 3 +- src/tools/index.ts | 2 + src/tools/perplexity-tool.ts | 53 ++++++++++++++++++++++++++ src/tools/reddit-post-tool.ts | 3 +- src/tools/reddit-subreddit-tool.ts | 3 +- src/tools/scrape-as-markdown-tool.ts | 4 +- src/tools/tool.ts | 8 ++++ src/types.ts | 4 ++ 12 files changed, 156 insertions(+), 7 deletions(-) create mode 100644 src/tools/chatgpt-tool.ts create mode 100644 src/tools/perplexity-tool.ts create mode 100644 src/tools/tool.ts diff --git a/src/constants.ts b/src/constants.ts index 0358be0..c622029 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -1,7 +1,18 @@ +export enum TOOLSET { + WEB = 'web', + ECOMMERCE = 'ecommerce', + SOCIAL_MEDIA = 'social_media', + AI = 'ai', +} + +// todo: utils export enum SCRAPER_API_TARGETS { GOOGLE_SEARCH = 'google_search', AMAZON_SEARCH = 'amazon_search', REDDIT_POST = 'reddit_post', REDDIT_SUBREDDIT = 'reddit_subreddit', + + CHATGPT = 'chatgpt', + PERPLEXITY = 'perplexity', } diff --git a/src/index.ts b/src/index.ts index 13ec873..2634f0c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,8 @@ import 'dotenv/config'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; -import { ScraperAPIStdioServer } from './server/sapi-stdio-server'; +import { ScraperAPIMCPServer } from './sapi-mcp-server'; +import { TOOLSET } from './constants'; if (process.env.ENABLE_MCPS_LOGGER) { import('mcps-logger/console'); @@ -23,15 +24,26 @@ const parseEnvsOrExit = (): Record => { }; }; +const resolveToolsets = (toolsets?: string): TOOLSET[] => { + if (!toolsets) { + return []; + } + + return toolsets.split(',').map(toolset => toolset as TOOLSET); +}; + async function main() { const transport = new StdioServerTransport(); // if there are no envs, some MCP clients will fail silently const { sapiUsername, sapiPassword } = parseEnvsOrExit(); - const sapiMcpServer = new ScraperAPIStdioServer({ + const toolsets = resolveToolsets(process.env.TOOLSETS); + + const sapiMcpServer = new ScraperAPIMCPServer({ sapiUsername, sapiPassword, + toolsets, }); await sapiMcpServer.connect(transport); diff --git a/src/tools/amazon-search-parsed-tool.ts b/src/tools/amazon-search-parsed-tool.ts index c6a2624..547cd55 100644 --- a/src/tools/amazon-search-parsed-tool.ts +++ b/src/tools/amazon-search-parsed-tool.ts @@ -2,11 +2,12 @@ import z from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScraperAPIParams, ScrapingMCPParams } from 'types'; import { ScraperApiClient } from 'clients/scraper-api-client'; -import { SCRAPER_API_TARGETS } from '../constants'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; import { removeKeyFromNestedObject } from '../utils'; import { zodGeo, zodJsRender } from '../zod/zod-types'; export class AmazonSearchParsedTool { + static toolset = TOOLSET.ECOMMERCE; static FIELDS_WITH_HIGH_CHAR_COUNT = ['suggested', 'amazons_choices', 'refinements']; static transformAutoParsedResponse = ({ obj }: { obj: object }): string => { diff --git a/src/tools/chatgpt-tool.ts b/src/tools/chatgpt-tool.ts new file mode 100644 index 0000000..461ebf2 --- /dev/null +++ b/src/tools/chatgpt-tool.ts @@ -0,0 +1,53 @@ +import z from 'zod'; +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { ScraperAPIParams, ScrapingMCPParams } from 'types'; +import { ScraperApiClient } from 'clients/scraper-api-client'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; +import { zodGeo } from '../zod/zod-types'; + +export class ChatGPTTool { + static toolset = TOOLSET.AI; + static register = ({ + server, + sapiClient, + }: { + server: McpServer; + sapiClient: ScraperApiClient; + }) => { + server.registerTool( + 'chatgpt', + { + description: 'Search and interact with ChatGPT for AI-powered responses and conversations', + inputSchema: { + prompt: z.string().describe('Prompt to send to ChatGPT'), + search: z.boolean().describe("Activates ChatGPT's web search functionality").optional(), + geo: zodGeo, + }, + annotations: { + readOnlyHint: true, + openWorldHint: true, + }, + }, + async (scrapingParams: ScrapingMCPParams) => { + const params = { + ...scrapingParams, + target: SCRAPER_API_TARGETS.CHATGPT, + parse: true, + } satisfies ScraperAPIParams; + + const { data } = await sapiClient.scrape({ scrapingParams: params }); + + const text = JSON.stringify(data, null, 2); + + return { + content: [ + { + type: 'text', + text, + }, + ], + }; + } + ); + }; +} diff --git a/src/tools/google-search-parsed-tool.ts b/src/tools/google-search-parsed-tool.ts index b46dcb2..2ef1fdb 100644 --- a/src/tools/google-search-parsed-tool.ts +++ b/src/tools/google-search-parsed-tool.ts @@ -2,11 +2,12 @@ import z from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScraperAPIParams, ScrapingMCPParams } from 'types'; import { ScraperApiClient } from 'clients/scraper-api-client'; -import { SCRAPER_API_TARGETS } from '../constants'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; import { removeKeyFromNestedObject } from '../utils'; import { zodGeo, zodLocale, zodJsRender } from '../zod/zod-types'; export class GoogleSearchParsedTool { + static toolset = TOOLSET.WEB; static FIELDS_WITH_HIGH_CHAR_COUNT = [ 'images', 'image_data', diff --git a/src/tools/index.ts b/src/tools/index.ts index 5c11632..8a48792 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -1,5 +1,7 @@ export * from './amazon-search-parsed-tool'; +export * from './chatgpt-tool'; export * from './google-search-parsed-tool'; +export * from './perplexity-tool'; export * from './reddit-subreddit-tool'; export * from './reddit-post-tool'; export * from './scrape-as-markdown-tool'; diff --git a/src/tools/perplexity-tool.ts b/src/tools/perplexity-tool.ts new file mode 100644 index 0000000..8ad8093 --- /dev/null +++ b/src/tools/perplexity-tool.ts @@ -0,0 +1,53 @@ +import z from 'zod'; +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { ScraperAPIParams, ScrapingMCPParams } from 'types'; +import { ScraperApiClient } from 'clients/scraper-api-client'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; +import { zodGeo } from '../zod/zod-types'; + +export class PerplexityTool { + static toolset = TOOLSET.AI; + static register = ({ + server, + sapiClient, + }: { + server: McpServer; + sapiClient: ScraperApiClient; + }) => { + server.registerTool( + 'perplexity', + { + description: + 'Search and interact with Perplexity for AI-powered responses and conversations', + inputSchema: { + prompt: z.string().describe('Prompt to send to Perplexity'), + geo: zodGeo, + }, + annotations: { + readOnlyHint: true, + openWorldHint: true, + }, + }, + async (scrapingParams: ScrapingMCPParams) => { + const params = { + ...scrapingParams, + target: SCRAPER_API_TARGETS.PERPLEXITY, + parse: true, + } satisfies ScraperAPIParams; + + const { data } = await sapiClient.scrape({ scrapingParams: params }); + + const text = JSON.stringify(data, null, 2); + + return { + content: [ + { + type: 'text', + text, + }, + ], + }; + } + ); + }; +} diff --git a/src/tools/reddit-post-tool.ts b/src/tools/reddit-post-tool.ts index 724f775..2d07d0e 100644 --- a/src/tools/reddit-post-tool.ts +++ b/src/tools/reddit-post-tool.ts @@ -2,9 +2,10 @@ import z from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScraperAPIParams, ScrapingMCPParams } from 'types'; import { ScraperApiClient } from 'clients/scraper-api-client'; -import { SCRAPER_API_TARGETS } from '../constants'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; export class RedditPostTool { + static toolset = TOOLSET.SOCIAL_MEDIA; static register = ({ server, sapiClient, diff --git a/src/tools/reddit-subreddit-tool.ts b/src/tools/reddit-subreddit-tool.ts index 2b4bb9b..1acaa8b 100644 --- a/src/tools/reddit-subreddit-tool.ts +++ b/src/tools/reddit-subreddit-tool.ts @@ -2,9 +2,10 @@ import z from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScraperAPIParams, ScrapingMCPParams } from 'types'; import { ScraperApiClient } from 'clients/scraper-api-client'; -import { SCRAPER_API_TARGETS } from '../constants'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; export class RedditSubredditTool { + static toolset = TOOLSET.SOCIAL_MEDIA; static register = ({ server, sapiClient, diff --git a/src/tools/scrape-as-markdown-tool.ts b/src/tools/scrape-as-markdown-tool.ts index c6a4676..801abac 100644 --- a/src/tools/scrape-as-markdown-tool.ts +++ b/src/tools/scrape-as-markdown-tool.ts @@ -4,9 +4,11 @@ import { ScrapingMCPParams } from 'types'; import { ScraperApiClient } from '../clients/scraper-api-client'; import { NodeHtmlMarkdown } from 'node-html-markdown'; import { zodFullResponse, zodGeo, zodJsRender, zodLocale, zodTokenLimit } from '../zod/zod-types'; +import { TOOLSET } from '../constants'; export class ScrapeAsMarkdownTool { - static LARGE_CONTENT_SYMBOL_COUNT = 10_000; + static toolset = TOOLSET.WEB; + static LARGE_CONTENT_SYMBOL_COUNT = 100_000; static isResponseOverLimit = (content: string) => { return content.length > this.LARGE_CONTENT_SYMBOL_COUNT; diff --git a/src/tools/tool.ts b/src/tools/tool.ts new file mode 100644 index 0000000..72abc66 --- /dev/null +++ b/src/tools/tool.ts @@ -0,0 +1,8 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { ScraperApiClient } from '../clients/scraper-api-client'; +import { TOOLSET } from '../constants'; + +export type ToolClass = { + readonly toolset: TOOLSET; + register: (args: { server: McpServer; sapiClient: ScraperApiClient }) => void; +}; diff --git a/src/types.ts b/src/types.ts index 8723915..976f672 100644 --- a/src/types.ts +++ b/src/types.ts @@ -3,6 +3,8 @@ import { SCRAPER_API_TARGETS } from './constants'; export type ScrapingMCPParams = { url?: string; query?: string; + prompt?: string; + search?: boolean; geo?: string; locale?: string; jsRender?: boolean; @@ -15,6 +17,8 @@ export type ScraperAPIParams = { target?: SCRAPER_API_TARGETS; url?: string; query?: string; + prompt?: string; + search?: boolean; geo?: string; locale?: string; headless?: string; From 2777bb98782e32d5121a920a20e323962e1f1d3b Mon Sep 17 00:00:00 2001 From: Domantas Jurkus Date: Fri, 17 Apr 2026 10:12:45 +0300 Subject: [PATCH 2/8] Add client error handling + more tests --- src/__tests__/chatgpt-tool.test.ts | 73 +++++++++++++++++++ src/__tests__/mocks/amazon-search-parsed.json | 73 +++++++++++++++++++ src/__tests__/perplexity-tool.test.ts | 73 +++++++++++++++++++ src/__tests__/reddit-post-tool.test.ts | 64 ++++++---------- src/__tests__/reddit-subreddit-tool.test.ts | 64 ++++++---------- src/__tests__/scrape-as-markdown-tool.test.ts | 7 +- src/__tests__/scraper-api-client.test.ts | 10 ++- src/__tests__/screenshot-tool.test.ts | 11 ++- src/clients/scraper-api-client.ts | 2 +- src/tools/scrape-as-markdown-tool.ts | 27 +------ 10 files changed, 281 insertions(+), 123 deletions(-) create mode 100644 src/__tests__/chatgpt-tool.test.ts create mode 100644 src/__tests__/mocks/amazon-search-parsed.json create mode 100644 src/__tests__/perplexity-tool.test.ts diff --git a/src/__tests__/chatgpt-tool.test.ts b/src/__tests__/chatgpt-tool.test.ts new file mode 100644 index 0000000..3e76441 --- /dev/null +++ b/src/__tests__/chatgpt-tool.test.ts @@ -0,0 +1,73 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { ScraperApiClient } from '../clients/scraper-api-client'; +import { ChatGPTTool } from '../tools/chatgpt-tool'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; + +jest.mock('@modelcontextprotocol/sdk/server/mcp.js'); +jest.mock('../clients/scraper-api-client'); + +describe('ChatGPTTool', () => { + let server: jest.Mocked; + let sapiClient: jest.Mocked; + + beforeEach(() => { + server = new McpServer({ name: 'test', version: '1.0' }) as jest.Mocked; + server.registerTool = jest.fn(); + sapiClient = new ScraperApiClient({ auth: 'test' }) as jest.Mocked; + }); + + it('has ai toolset', () => { + expect(ChatGPTTool.toolset).toBe(TOOLSET.AI); + }); + + it('registers with correct tool name', () => { + ChatGPTTool.register({ server, sapiClient }); + + expect(server.registerTool).toHaveBeenCalledWith( + 'chatgpt', + expect.any(Object), + expect.any(Function) + ); + }); + + it('calls scrape with CHATGPT target and parse: true', async () => { + const mockData = { response: 'Hello! How can I help you?' }; + sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); + + ChatGPTTool.register({ server, sapiClient }); + + const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; + const result = await handler({ prompt: 'What is TypeScript?' }); + + expect(sapiClient.scrape).toHaveBeenCalledWith({ + scrapingParams: expect.objectContaining({ + prompt: 'What is TypeScript?', + target: SCRAPER_API_TARGETS.CHATGPT, + parse: true, + }), + }); + + expect(result.content).toHaveLength(1); + expect(result.content[0].type).toBe('text'); + expect(JSON.parse(result.content[0].text)).toEqual(mockData); + }); + + it('passes search parameter when provided', async () => { + const mockData = { response: 'Search results...' }; + sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); + + ChatGPTTool.register({ server, sapiClient }); + + const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; + await handler({ prompt: 'Latest news', search: true }); + + expect(sapiClient.scrape).toHaveBeenCalledWith({ + scrapingParams: expect.objectContaining({ + prompt: 'Latest news', + search: true, + target: SCRAPER_API_TARGETS.CHATGPT, + parse: true, + }), + }); + }); +}); diff --git a/src/__tests__/mocks/amazon-search-parsed.json b/src/__tests__/mocks/amazon-search-parsed.json new file mode 100644 index 0000000..86b02fc --- /dev/null +++ b/src/__tests__/mocks/amazon-search-parsed.json @@ -0,0 +1,73 @@ +{ + "results": [ + { + "content": { + "results": { + "url": "https://www.amazon.com/s?k=laptop", + "page": 1, + "query": "laptop", + "results": { + "organic": [ + { + "pos": 1, + "url": "/dp/B0CX23V2ZK", + "asin": "B0CX23V2ZK", + "price": 799.99, + "title": "Laptop 15.6 inch, 16GB RAM", + "rating": 4.5, + "currency": "USD", + "is_prime": true, + "reviews_count": 1234 + }, + { + "pos": 2, + "url": "/dp/B0D1234567", + "asin": "B0D1234567", + "price": 599.99, + "title": "Budget Laptop 14 inch, 8GB RAM", + "rating": 4.2, + "currency": "USD", + "is_prime": false, + "reviews_count": 567 + } + ] + }, + "suggested": [ + { + "title": "laptop stand", + "url": "/s?k=laptop+stand" + }, + { + "title": "laptop bag", + "url": "/s?k=laptop+bag" + } + ], + "amazons_choices": [ + { + "pos": 1, + "url": "/dp/B0AMAZONCHOICE", + "asin": "B0AMAZONCHOICE", + "price": 699.99, + "title": "Amazon's Choice Laptop" + } + ], + "refinements": { + "brands": ["HP", "Dell", "Lenovo", "Apple", "ASUS"], + "price_ranges": ["$200 - $400", "$400 - $600", "$600 - $800", "$800+"] + }, + "parse_status_code": 12000 + }, + "errors": [], + "status_code": 12000, + "task_id": "7341006309614950402" + }, + "headers": {}, + "status_code": 200, + "url": "https://www.amazon.com/s?k=laptop", + "query": "laptop", + "task_id": "7341006309614950402", + "created_at": "2025-06-18 07:38:13", + "updated_at": "2025-06-18 07:38:15" + } + ] +} diff --git a/src/__tests__/perplexity-tool.test.ts b/src/__tests__/perplexity-tool.test.ts new file mode 100644 index 0000000..d93c511 --- /dev/null +++ b/src/__tests__/perplexity-tool.test.ts @@ -0,0 +1,73 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { ScraperApiClient } from '../clients/scraper-api-client'; +import { PerplexityTool } from '../tools/perplexity-tool'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; + +jest.mock('@modelcontextprotocol/sdk/server/mcp.js'); +jest.mock('../clients/scraper-api-client'); + +describe('PerplexityTool', () => { + let server: jest.Mocked; + let sapiClient: jest.Mocked; + + beforeEach(() => { + server = new McpServer({ name: 'test', version: '1.0' }) as jest.Mocked; + server.registerTool = jest.fn(); + sapiClient = new ScraperApiClient({ auth: 'test' }) as jest.Mocked; + }); + + it('has ai toolset', () => { + expect(PerplexityTool.toolset).toBe(TOOLSET.AI); + }); + + it('registers with correct tool name', () => { + PerplexityTool.register({ server, sapiClient }); + + expect(server.registerTool).toHaveBeenCalledWith( + 'perplexity', + expect.any(Object), + expect.any(Function) + ); + }); + + it('calls scrape with PERPLEXITY target and parse: true', async () => { + const mockData = { answer: 'Perplexity response with sources', sources: ['url1', 'url2'] }; + sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); + + PerplexityTool.register({ server, sapiClient }); + + const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; + const result = await handler({ prompt: 'What is MCP?' }); + + expect(sapiClient.scrape).toHaveBeenCalledWith({ + scrapingParams: expect.objectContaining({ + prompt: 'What is MCP?', + target: SCRAPER_API_TARGETS.PERPLEXITY, + parse: true, + }), + }); + + expect(result.content).toHaveLength(1); + expect(result.content[0].type).toBe('text'); + expect(JSON.parse(result.content[0].text)).toEqual(mockData); + }); + + it('passes geo parameter when provided', async () => { + const mockData = { answer: 'Local response' }; + sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); + + PerplexityTool.register({ server, sapiClient }); + + const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; + await handler({ prompt: 'Weather today', geo: 'United States' }); + + expect(sapiClient.scrape).toHaveBeenCalledWith({ + scrapingParams: expect.objectContaining({ + prompt: 'Weather today', + geo: 'United States', + target: SCRAPER_API_TARGETS.PERPLEXITY, + parse: true, + }), + }); + }); +}); diff --git a/src/__tests__/reddit-post-tool.test.ts b/src/__tests__/reddit-post-tool.test.ts index a789298..6915cb8 100644 --- a/src/__tests__/reddit-post-tool.test.ts +++ b/src/__tests__/reddit-post-tool.test.ts @@ -1,75 +1,53 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScraperApiClient } from '../clients/scraper-api-client'; import { RedditPostTool } from '../tools/reddit-post-tool'; -import { ScrapingMCPParams } from '../types'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; jest.mock('@modelcontextprotocol/sdk/server/mcp.js'); jest.mock('../clients/scraper-api-client'); -const MockedMcpServer = McpServer as jest.MockedClass; -const MockedScraperApiClient = ScraperApiClient as jest.MockedClass; - describe('RedditPostTool', () => { let server: jest.Mocked; let sapiClient: jest.Mocked; - let registeredHandler: (params: ScrapingMCPParams) => Promise; - const auth = 'dGVzdDp0ZXN0'; beforeEach(() => { - server = new MockedMcpServer({ name: 'test', version: '0.0.0' }) as jest.Mocked; - sapiClient = new MockedScraperApiClient() as jest.Mocked; - - server.registerTool = jest.fn((_name, _config, handler) => { - registeredHandler = handler as typeof registeredHandler; - return server; - }); + server = new McpServer({ name: 'test', version: '1.0' }) as jest.Mocked; + server.registerTool = jest.fn(); + sapiClient = new ScraperApiClient({ auth: 'test' }) as jest.Mocked; + }); - RedditPostTool.register({ server, sapiClient, getAuthToken: () => auth }); + it('has social_media toolset', () => { + expect(RedditPostTool.toolset).toBe(TOOLSET.SOCIAL_MEDIA); }); - it('registers a tool named "reddit_post"', () => { + it('registers with correct tool name', () => { + RedditPostTool.register({ server, sapiClient }); + expect(server.registerTool).toHaveBeenCalledWith( 'reddit_post', - expect.objectContaining({ description: expect.stringContaining('Reddit') }), + expect.any(Object), expect.any(Function) ); }); - it('returns a text content block with pretty-printed JSON', async () => { - const mockData = { title: 'Test post', comments: [{ body: 'Great post!' }] }; + it('calls scrape with REDDIT_POST target', async () => { + const mockData = { title: 'Test post', comments: [] }; sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); - const result = (await registeredHandler({ - url: 'https://www.reddit.com/r/test/comments/abc123/', - })) as { content: { type: string; text: string }[] }; - - expect(result.content).toHaveLength(1); - expect(result.content[0].type).toBe('text'); - expect(result.content[0].text).toBe(JSON.stringify(mockData, null, 2)); - }); - - it('passes reddit_post target to the scraper', async () => { - sapiClient.scrape = jest.fn().mockResolvedValue({ data: {} }); + RedditPostTool.register({ server, sapiClient }); - const postUrl = 'https://www.reddit.com/r/horseracing/comments/1nsrn3/'; - await registeredHandler({ url: postUrl }); + const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; + const result = await handler({ url: 'https://reddit.com/r/test/comments/abc' }); expect(sapiClient.scrape).toHaveBeenCalledWith({ - auth, scrapingParams: expect.objectContaining({ - url: postUrl, - target: 'reddit_post', + url: 'https://reddit.com/r/test/comments/abc', + target: SCRAPER_API_TARGETS.REDDIT_POST, }), }); - }); - it('propagates scraper errors', async () => { - sapiClient.scrape = jest - .fn() - .mockRejectedValue(new Error('Scraper API request failed (401): Authentication failed.')); - - await expect( - registeredHandler({ url: 'https://www.reddit.com/r/test/comments/abc123/' }) - ).rejects.toThrow('Scraper API request failed (401): Authentication failed.'); + expect(result.content).toHaveLength(1); + expect(result.content[0].type).toBe('text'); + expect(JSON.parse(result.content[0].text)).toEqual(mockData); }); }); diff --git a/src/__tests__/reddit-subreddit-tool.test.ts b/src/__tests__/reddit-subreddit-tool.test.ts index bc763a3..a8ed224 100644 --- a/src/__tests__/reddit-subreddit-tool.test.ts +++ b/src/__tests__/reddit-subreddit-tool.test.ts @@ -1,75 +1,53 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScraperApiClient } from '../clients/scraper-api-client'; import { RedditSubredditTool } from '../tools/reddit-subreddit-tool'; -import { ScrapingMCPParams } from '../types'; +import { SCRAPER_API_TARGETS, TOOLSET } from '../constants'; jest.mock('@modelcontextprotocol/sdk/server/mcp.js'); jest.mock('../clients/scraper-api-client'); -const MockedMcpServer = McpServer as jest.MockedClass; -const MockedScraperApiClient = ScraperApiClient as jest.MockedClass; - describe('RedditSubredditTool', () => { let server: jest.Mocked; let sapiClient: jest.Mocked; - let registeredHandler: (params: ScrapingMCPParams) => Promise; - const auth = 'dGVzdDp0ZXN0'; beforeEach(() => { - server = new MockedMcpServer({ name: 'test', version: '0.0.0' }) as jest.Mocked; - sapiClient = new MockedScraperApiClient() as jest.Mocked; - - server.registerTool = jest.fn((_name, _config, handler) => { - registeredHandler = handler as typeof registeredHandler; - return server; - }); + server = new McpServer({ name: 'test', version: '1.0' }) as jest.Mocked; + server.registerTool = jest.fn(); + sapiClient = new ScraperApiClient({ auth: 'test' }) as jest.Mocked; + }); - RedditSubredditTool.register({ server, sapiClient, getAuthToken: () => auth }); + it('has social_media toolset', () => { + expect(RedditSubredditTool.toolset).toBe(TOOLSET.SOCIAL_MEDIA); }); - it('registers a tool named "reddit_subreddit"', () => { + it('registers with correct tool name', () => { + RedditSubredditTool.register({ server, sapiClient }); + expect(server.registerTool).toHaveBeenCalledWith( 'reddit_subreddit', - expect.objectContaining({ description: expect.stringContaining('subreddit') }), + expect.any(Object), expect.any(Function) ); }); - it('returns a text content block with pretty-printed JSON', async () => { - const mockData = { posts: [{ title: 'Top post', score: 1200 }] }; + it('calls scrape with REDDIT_SUBREDDIT target', async () => { + const mockData = { posts: [{ title: 'Post 1' }, { title: 'Post 2' }] }; sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); - const result = (await registeredHandler({ url: 'https://www.reddit.com/r/Python/' })) as { - content: { type: string; text: string }[]; - }; + RedditSubredditTool.register({ server, sapiClient }); - expect(result.content).toHaveLength(1); - expect(result.content[0].type).toBe('text'); - expect(result.content[0].text).toBe(JSON.stringify(mockData, null, 2)); - }); - - it('passes reddit_subreddit target to the scraper', async () => { - sapiClient.scrape = jest.fn().mockResolvedValue({ data: {} }); - - const subredditUrl = 'https://www.reddit.com/r/Python/'; - await registeredHandler({ url: subredditUrl }); + const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; + const result = await handler({ url: 'https://reddit.com/r/programming' }); expect(sapiClient.scrape).toHaveBeenCalledWith({ - auth, scrapingParams: expect.objectContaining({ - url: subredditUrl, - target: 'reddit_subreddit', + url: 'https://reddit.com/r/programming', + target: SCRAPER_API_TARGETS.REDDIT_SUBREDDIT, }), }); - }); - - it('propagates scraper errors', async () => { - sapiClient.scrape = jest - .fn() - .mockRejectedValue(new Error('Scraper API request failed (401): Authentication failed.')); - await expect(registeredHandler({ url: 'https://www.reddit.com/r/Python/' })).rejects.toThrow( - 'Scraper API request failed (401): Authentication failed.' - ); + expect(result.content).toHaveLength(1); + expect(result.content[0].type).toBe('text'); + expect(JSON.parse(result.content[0].text)).toEqual(mockData); }); }); diff --git a/src/__tests__/scrape-as-markdown-tool.test.ts b/src/__tests__/scrape-as-markdown-tool.test.ts index c593408..222a74e 100644 --- a/src/__tests__/scrape-as-markdown-tool.test.ts +++ b/src/__tests__/scrape-as-markdown-tool.test.ts @@ -27,8 +27,8 @@ describe('ScrapeAsMarkdownTool', () => { const html = 'x'.repeat(ScrapeAsMarkdownTool.LARGE_CONTENT_SYMBOL_COUNT + 1000); const result = ScrapeAsMarkdownTool.transformResponse({ html }); - expect(result.markdown).toBe( - html.substring(0, ScrapeAsMarkdownTool.LARGE_CONTENT_SYMBOL_COUNT) + expect(result.markdown.length).toBe( + html.substring(0, ScrapeAsMarkdownTool.LARGE_CONTENT_SYMBOL_COUNT).length ); expect(result.isTruncated).toBe(true); }); @@ -55,13 +55,12 @@ describe('ScrapeAsMarkdownTool', () => { expect(result.isTruncated).toBe(true); }); - it('skips truncation when fullResponse is true', () => { + it('does not truncate when response is below limit', () => { const longMarkdown = 'a'.repeat(20_000); mockedNHM.translate.mockReturnValue(longMarkdown); const result = ScrapeAsMarkdownTool.transformResponse({ html: '

long

', - shouldShowFullResponse: true, }); expect(result.markdown.length).toBe(20_000); diff --git a/src/__tests__/scraper-api-client.test.ts b/src/__tests__/scraper-api-client.test.ts index ba9ffbb..3298963 100644 --- a/src/__tests__/scraper-api-client.test.ts +++ b/src/__tests__/scraper-api-client.test.ts @@ -60,12 +60,14 @@ describe('ScraperApiClient', () => { it('throws friendly message on 429', async () => { mockedAxios.request.mockRejectedValue( - createAxiosError({ status: 429, message: 'Too Many Requests' }) + createAxiosError({ + status: 429, + message: 'Too Many Requests', + data: 'Rate limit exceeded', + }) ); - await expect(client.scrape(defaultArgs)).rejects.toThrow( - 'Scraper API request failed (429): Rate limit exceeded, please wait before sending another request.' - ); + await expect(client.scrape(defaultArgs)).rejects.toThrow('Rate limit exceeded'); }); it('uses server message on 502', async () => { diff --git a/src/__tests__/screenshot-tool.test.ts b/src/__tests__/screenshot-tool.test.ts index eb281a8..e6cc4a6 100644 --- a/src/__tests__/screenshot-tool.test.ts +++ b/src/__tests__/screenshot-tool.test.ts @@ -36,10 +36,13 @@ describe('ScreenshotTool', () => { }); it('returns an image/png content block', async () => { - const base64png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='; + const base64png = + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='; sapiClient.scrape = jest.fn().mockResolvedValue({ data: base64png }); - const result = await registeredHandler({ url: 'https://example.com' }) as { content: { type: string; data: string; mimeType: string }[] }; + const result = (await registeredHandler({ url: 'https://example.com' })) as { + content: { type: string; data: string; mimeType: string }[]; + }; expect(result.content).toHaveLength(1); expect(result.content[0]).toEqual({ @@ -61,7 +64,9 @@ describe('ScreenshotTool', () => { }); it('propagates scraper errors', async () => { - sapiClient.scrape = jest.fn().mockRejectedValue(new Error('Scraper API request failed (401): Authentication failed.')); + sapiClient.scrape = jest + .fn() + .mockRejectedValue(new Error('Scraper API request failed (401): Authentication failed.')); await expect(registeredHandler({ url: 'https://example.com' })).rejects.toThrow( 'Scraper API request failed (401): Authentication failed.' diff --git a/src/clients/scraper-api-client.ts b/src/clients/scraper-api-client.ts index 6720ba4..ec27682 100644 --- a/src/clients/scraper-api-client.ts +++ b/src/clients/scraper-api-client.ts @@ -60,7 +60,7 @@ export class ScraperApiClient { errorMessage = 'Authentication failed.'; } if (error.response?.status === 429) { - errorMessage = 'Rate limit exceeded, please wait before sending another request.'; + errorMessage = JSON.stringify(error.response?.data); } throw new Error(`Scraper API request failed (${status}): ${errorMessage}`); diff --git a/src/tools/scrape-as-markdown-tool.ts b/src/tools/scrape-as-markdown-tool.ts index 801abac..e28510e 100644 --- a/src/tools/scrape-as-markdown-tool.ts +++ b/src/tools/scrape-as-markdown-tool.ts @@ -14,33 +14,11 @@ export class ScrapeAsMarkdownTool { return content.length > this.LARGE_CONTENT_SYMBOL_COUNT; }; - static shouldTruncateResponse = ({ - content, - shouldShowFullResponse, - }: { - content: string; - shouldShowFullResponse?: boolean; - }) => { - if (shouldShowFullResponse) { - return false; - } - - return this.isResponseOverLimit(content); - }; - static truncateResponse = ({ content, limit }: { content: string; limit: number }) => { return content.substring(0, limit); }; - static transformResponse = ({ - html, - tokenLimit, - shouldShowFullResponse, - }: { - html: string; - tokenLimit?: number; - shouldShowFullResponse?: boolean; - }) => { + static transformResponse = ({ html, tokenLimit }: { html: string; tokenLimit?: number }) => { let markdown: string; try { markdown = NodeHtmlMarkdown.translate(html, {}); @@ -48,7 +26,7 @@ export class ScrapeAsMarkdownTool { markdown = html; } - if (this.shouldTruncateResponse({ content: markdown, shouldShowFullResponse })) { + if (tokenLimit || this.isResponseOverLimit(markdown)) { const truncated = this.truncateResponse({ content: markdown, limit: tokenLimit || this.LARGE_CONTENT_SYMBOL_COUNT, @@ -94,7 +72,6 @@ export class ScrapeAsMarkdownTool { const { markdown, isTruncated } = this.transformResponse({ html: data, tokenLimit: scrapingParams.tokenLimit, - shouldShowFullResponse: scrapingParams.fullResponse, }); return { From 5c352f5d7439ba0dd2f771d4094a589aa65fddcf Mon Sep 17 00:00:00 2001 From: Domantas Jurkus Date: Fri, 17 Apr 2026 10:17:24 +0300 Subject: [PATCH 3/8] Remove fullResponse param + readme --- README.md | 22 +++++++++++++++++++++- src/tools/scrape-as-markdown-tool.ts | 3 +-- src/types.ts | 1 - src/zod/zod-types.ts | 5 ----- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 429328c..603b6c2 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,25 @@ The server exposes the following tools: | `amazon_search_parsed` | Scrapes Amazon Search for a given query, and returns parsed results. | Scrape Amazon Search for toothbrushes. | | `reddit_post` | Scrapes a specific Reddit post for a given query, and returns parsed results. | Scrape the following Reddit post: https://www.reddit.com/r/horseracing/comments/1nsrn3/ | | `reddit_subreddit` | Scrapes a specific Reddit subreddit for a given query, and returns parsed results. | Scrape the top 5 posts on r/Python this week. | +| `chatgpt` | Search and interact with ChatGPT for AI-powered responses and conversations. | Ask ChatGPT to explain quantum computing in simple terms. | +| `perplexity` | Search and interact with Perplexity for AI-powered responses and conversations. | Ask Perplexity what the latest trends in web development are. | + +## Toolsets + +Tools are organized into toolsets. You can selectively enable specific toolsets by passing a +comma-separated list via the `TOOLSETS` environment variable. When no toolsets are specified, all +tools are registered. + +``` +TOOLSETS=web,ai +``` + +| Toolset | Tools | +| -------------- | -------------------------------------------- | +| `web` | `scrape_as_markdown`, `google_search_parsed` | +| `ecommerce` | `amazon_search_parsed` | +| `social_media` | `reddit_post`, `reddit_subreddit` | +| `ai` | `chatgpt`, `perplexity` | ## Parameters @@ -107,7 +126,8 @@ The following parameters are inferred from user prompts: | `geo` | Sets the country from which the request will originate. | | `locale` | Sets the locale of the request. | | `tokenLimit` | Truncates the response content up to this limit. Useful if the context window is small. | -| `fullResponse` | Skips automatic truncation and returns full content. If context window is small, may throw warnings. | +| `prompt` | Prompt to send to AI tools (`chatgpt`, `perplexity`). | +| `search` | Activates ChatGPT's web search functionality (`chatgpt` only). | ## Examples diff --git a/src/tools/scrape-as-markdown-tool.ts b/src/tools/scrape-as-markdown-tool.ts index e28510e..f96fb61 100644 --- a/src/tools/scrape-as-markdown-tool.ts +++ b/src/tools/scrape-as-markdown-tool.ts @@ -3,7 +3,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScrapingMCPParams } from 'types'; import { ScraperApiClient } from '../clients/scraper-api-client'; import { NodeHtmlMarkdown } from 'node-html-markdown'; -import { zodFullResponse, zodGeo, zodJsRender, zodLocale, zodTokenLimit } from '../zod/zod-types'; +import { zodGeo, zodJsRender, zodLocale, zodTokenLimit } from '../zod/zod-types'; import { TOOLSET } from '../constants'; export class ScrapeAsMarkdownTool { @@ -57,7 +57,6 @@ export class ScrapeAsMarkdownTool { locale: zodLocale, jsRender: zodJsRender, tokenLimit: zodTokenLimit, - fullResponse: zodFullResponse, }, annotations: { readOnlyHint: true, diff --git a/src/types.ts b/src/types.ts index 976f672..8617b0f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -10,7 +10,6 @@ export type ScrapingMCPParams = { jsRender?: boolean; headless?: string; tokenLimit?: number; - fullResponse?: boolean; }; export type ScraperAPIParams = { diff --git a/src/zod/zod-types.ts b/src/zod/zod-types.ts index 026e73b..507bc16 100644 --- a/src/zod/zod-types.ts +++ b/src/zod/zod-types.ts @@ -18,8 +18,3 @@ export const zodTokenLimit = z `The number of tokens to return in the response - anything above this limit will be truncated` ) .optional(); - -export const zodFullResponse = z - .boolean() - .describe(`If true, content will not be truncated`) - .optional(); From 95c329a3e79bd991555906ac9df3528ca970f1b6 Mon Sep 17 00:00:00 2001 From: Domantas Jurkus Date: Fri, 17 Apr 2026 10:43:11 +0300 Subject: [PATCH 4/8] Add toolset support --- README.md | 12 +++- src/__tests__/chatgpt-tool.test.ts | 11 ++-- src/__tests__/perplexity-tool.test.ts | 11 ++-- src/__tests__/reddit-post-tool.test.ts | 8 ++- src/__tests__/reddit-subreddit-tool.test.ts | 8 ++- src/server.ts | 13 +++- src/server/sapi-base-server.ts | 67 +++++++++++---------- src/server/sapi-http-server.ts | 5 +- src/server/sapi-stdio-server.ts | 13 +++- src/tools/chatgpt-tool.ts | 6 +- src/tools/perplexity-tool.ts | 6 +- src/tools/screenshot-tool.ts | 2 + src/tools/tool.ts | 6 +- 13 files changed, 112 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 603b6c2..0e4fdab 100644 --- a/README.md +++ b/README.md @@ -102,13 +102,19 @@ The server exposes the following tools: ## Toolsets Tools are organized into toolsets. You can selectively enable specific toolsets by passing a -comma-separated list via the `TOOLSETS` environment variable. When no toolsets are specified, all -tools are registered. +comma-separated list via the `toolsets` query parameter: ``` -TOOLSETS=web,ai + "Decodo MCP Server": { + "url": "https://mcp.decodo.com/mcp?toolsets=web,ai", + "headers": { + "Authorization": "Basic " + } + } ``` +When no toolsets are specified, all tools are registered. + | Toolset | Tools | | -------------- | -------------------------------------------- | | `web` | `scrape_as_markdown`, `google_search_parsed` | diff --git a/src/__tests__/chatgpt-tool.test.ts b/src/__tests__/chatgpt-tool.test.ts index 3e76441..372c99a 100644 --- a/src/__tests__/chatgpt-tool.test.ts +++ b/src/__tests__/chatgpt-tool.test.ts @@ -9,11 +9,12 @@ jest.mock('../clients/scraper-api-client'); describe('ChatGPTTool', () => { let server: jest.Mocked; let sapiClient: jest.Mocked; + const auth = 'dGVzdDp0ZXN0'; beforeEach(() => { server = new McpServer({ name: 'test', version: '1.0' }) as jest.Mocked; server.registerTool = jest.fn(); - sapiClient = new ScraperApiClient({ auth: 'test' }) as jest.Mocked; + sapiClient = new ScraperApiClient() as jest.Mocked; }); it('has ai toolset', () => { @@ -21,7 +22,7 @@ describe('ChatGPTTool', () => { }); it('registers with correct tool name', () => { - ChatGPTTool.register({ server, sapiClient }); + ChatGPTTool.register({ server, sapiClient, getAuthToken: () => auth }); expect(server.registerTool).toHaveBeenCalledWith( 'chatgpt', @@ -34,12 +35,13 @@ describe('ChatGPTTool', () => { const mockData = { response: 'Hello! How can I help you?' }; sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); - ChatGPTTool.register({ server, sapiClient }); + ChatGPTTool.register({ server, sapiClient, getAuthToken: () => auth }); const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; const result = await handler({ prompt: 'What is TypeScript?' }); expect(sapiClient.scrape).toHaveBeenCalledWith({ + auth, scrapingParams: expect.objectContaining({ prompt: 'What is TypeScript?', target: SCRAPER_API_TARGETS.CHATGPT, @@ -56,12 +58,13 @@ describe('ChatGPTTool', () => { const mockData = { response: 'Search results...' }; sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); - ChatGPTTool.register({ server, sapiClient }); + ChatGPTTool.register({ server, sapiClient, getAuthToken: () => auth }); const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; await handler({ prompt: 'Latest news', search: true }); expect(sapiClient.scrape).toHaveBeenCalledWith({ + auth, scrapingParams: expect.objectContaining({ prompt: 'Latest news', search: true, diff --git a/src/__tests__/perplexity-tool.test.ts b/src/__tests__/perplexity-tool.test.ts index d93c511..62b730f 100644 --- a/src/__tests__/perplexity-tool.test.ts +++ b/src/__tests__/perplexity-tool.test.ts @@ -9,11 +9,12 @@ jest.mock('../clients/scraper-api-client'); describe('PerplexityTool', () => { let server: jest.Mocked; let sapiClient: jest.Mocked; + const auth = 'dGVzdDp0ZXN0'; beforeEach(() => { server = new McpServer({ name: 'test', version: '1.0' }) as jest.Mocked; server.registerTool = jest.fn(); - sapiClient = new ScraperApiClient({ auth: 'test' }) as jest.Mocked; + sapiClient = new ScraperApiClient() as jest.Mocked; }); it('has ai toolset', () => { @@ -21,7 +22,7 @@ describe('PerplexityTool', () => { }); it('registers with correct tool name', () => { - PerplexityTool.register({ server, sapiClient }); + PerplexityTool.register({ server, sapiClient, getAuthToken: () => auth }); expect(server.registerTool).toHaveBeenCalledWith( 'perplexity', @@ -34,12 +35,13 @@ describe('PerplexityTool', () => { const mockData = { answer: 'Perplexity response with sources', sources: ['url1', 'url2'] }; sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); - PerplexityTool.register({ server, sapiClient }); + PerplexityTool.register({ server, sapiClient, getAuthToken: () => auth }); const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; const result = await handler({ prompt: 'What is MCP?' }); expect(sapiClient.scrape).toHaveBeenCalledWith({ + auth, scrapingParams: expect.objectContaining({ prompt: 'What is MCP?', target: SCRAPER_API_TARGETS.PERPLEXITY, @@ -56,12 +58,13 @@ describe('PerplexityTool', () => { const mockData = { answer: 'Local response' }; sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); - PerplexityTool.register({ server, sapiClient }); + PerplexityTool.register({ server, sapiClient, getAuthToken: () => auth }); const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; await handler({ prompt: 'Weather today', geo: 'United States' }); expect(sapiClient.scrape).toHaveBeenCalledWith({ + auth, scrapingParams: expect.objectContaining({ prompt: 'Weather today', geo: 'United States', diff --git a/src/__tests__/reddit-post-tool.test.ts b/src/__tests__/reddit-post-tool.test.ts index 6915cb8..3cc2a0c 100644 --- a/src/__tests__/reddit-post-tool.test.ts +++ b/src/__tests__/reddit-post-tool.test.ts @@ -9,11 +9,12 @@ jest.mock('../clients/scraper-api-client'); describe('RedditPostTool', () => { let server: jest.Mocked; let sapiClient: jest.Mocked; + const auth = 'dGVzdDp0ZXN0'; beforeEach(() => { server = new McpServer({ name: 'test', version: '1.0' }) as jest.Mocked; server.registerTool = jest.fn(); - sapiClient = new ScraperApiClient({ auth: 'test' }) as jest.Mocked; + sapiClient = new ScraperApiClient() as jest.Mocked; }); it('has social_media toolset', () => { @@ -21,7 +22,7 @@ describe('RedditPostTool', () => { }); it('registers with correct tool name', () => { - RedditPostTool.register({ server, sapiClient }); + RedditPostTool.register({ server, sapiClient, getAuthToken: () => auth }); expect(server.registerTool).toHaveBeenCalledWith( 'reddit_post', @@ -34,12 +35,13 @@ describe('RedditPostTool', () => { const mockData = { title: 'Test post', comments: [] }; sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); - RedditPostTool.register({ server, sapiClient }); + RedditPostTool.register({ server, sapiClient, getAuthToken: () => auth }); const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; const result = await handler({ url: 'https://reddit.com/r/test/comments/abc' }); expect(sapiClient.scrape).toHaveBeenCalledWith({ + auth, scrapingParams: expect.objectContaining({ url: 'https://reddit.com/r/test/comments/abc', target: SCRAPER_API_TARGETS.REDDIT_POST, diff --git a/src/__tests__/reddit-subreddit-tool.test.ts b/src/__tests__/reddit-subreddit-tool.test.ts index a8ed224..8abf092 100644 --- a/src/__tests__/reddit-subreddit-tool.test.ts +++ b/src/__tests__/reddit-subreddit-tool.test.ts @@ -9,11 +9,12 @@ jest.mock('../clients/scraper-api-client'); describe('RedditSubredditTool', () => { let server: jest.Mocked; let sapiClient: jest.Mocked; + const auth = 'dGVzdDp0ZXN0'; beforeEach(() => { server = new McpServer({ name: 'test', version: '1.0' }) as jest.Mocked; server.registerTool = jest.fn(); - sapiClient = new ScraperApiClient({ auth: 'test' }) as jest.Mocked; + sapiClient = new ScraperApiClient() as jest.Mocked; }); it('has social_media toolset', () => { @@ -21,7 +22,7 @@ describe('RedditSubredditTool', () => { }); it('registers with correct tool name', () => { - RedditSubredditTool.register({ server, sapiClient }); + RedditSubredditTool.register({ server, sapiClient, getAuthToken: () => auth }); expect(server.registerTool).toHaveBeenCalledWith( 'reddit_subreddit', @@ -34,12 +35,13 @@ describe('RedditSubredditTool', () => { const mockData = { posts: [{ title: 'Post 1' }, { title: 'Post 2' }] }; sapiClient.scrape = jest.fn().mockResolvedValue({ data: mockData }); - RedditSubredditTool.register({ server, sapiClient }); + RedditSubredditTool.register({ server, sapiClient, getAuthToken: () => auth }); const handler = (server.registerTool as jest.Mock).mock.calls[0][2]; const result = await handler({ url: 'https://reddit.com/r/programming' }); expect(sapiClient.scrape).toHaveBeenCalledWith({ + auth, scrapingParams: expect.objectContaining({ url: 'https://reddit.com/r/programming', target: SCRAPER_API_TARGETS.REDDIT_SUBREDDIT, diff --git a/src/server.ts b/src/server.ts index f2025a2..82ccea9 100644 --- a/src/server.ts +++ b/src/server.ts @@ -2,11 +2,20 @@ import 'dotenv/config'; import express from 'express'; import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js'; import { ScraperAPIHttpServer } from './server/sapi-http-server'; +import { TOOLSET } from './constants'; const app = express(); app.use(express.json()); +const resolveToolsets = (toolsets?: string): TOOLSET[] => { + if (!toolsets) { + return []; + } + + return toolsets.split(',').map(toolset => toolset as TOOLSET); +}; + app.get('/mcp', (_req, res) => { res.status(200).send('server up, use POST /mcp to see available tools'); }); @@ -28,7 +37,9 @@ app.post('/mcp', async (req, res) => { const token = parts[1]; - const server = new ScraperAPIHttpServer(); + const toolsets = resolveToolsets(req.query.toolsets as string); + + const server = new ScraperAPIHttpServer({ toolsets }); const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: undefined, diff --git a/src/server/sapi-base-server.ts b/src/server/sapi-base-server.ts index 9f0a091..00b8b29 100644 --- a/src/server/sapi-base-server.ts +++ b/src/server/sapi-base-server.ts @@ -3,13 +3,17 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' import { ScraperApiClient } from '../clients/scraper-api-client'; import { AmazonSearchParsedTool, + ChatGPTTool, GoogleSearchParsedTool, + PerplexityTool, RedditPostTool, RedditSubredditTool, ScrapeAsMarkdownTool, ScreenshotTool, } from '../tools'; +import { ToolClass } from '../tools/tool'; import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js'; +import { TOOLSET } from '../constants'; export class ScraperAPIBaseServer { server: McpServer; @@ -18,14 +22,14 @@ export class ScraperAPIBaseServer { auth: string = ''; - constructor() { + constructor({ toolsets = [] }: { toolsets: TOOLSET[] }) { this.server = new McpServer({ name: 'decodo', version: '1.0.3', }); this.sapiClient = new ScraperApiClient(); - this.registerTools(); + this.registerTools({ toolsets }); this.registerResources(); } @@ -38,38 +42,39 @@ export class ScraperAPIBaseServer { this.server.connect(transport); } - registerTools() { + static allTools: ToolClass[] = [ + ScrapeAsMarkdownTool, + ScreenshotTool, + GoogleSearchParsedTool, + AmazonSearchParsedTool, + RedditPostTool, + RedditSubredditTool, + ChatGPTTool, + PerplexityTool, + ]; + + registerTools({ toolsets }: { toolsets: TOOLSET[] }) { + if (toolsets.length === 0) { + this.registerAllTools(); + return; + } + const getAuthToken = () => this.auth; - // scrape - ScrapeAsMarkdownTool.register({ - server: this.server, - sapiClient: this.sapiClient, - getAuthToken, - }); + for (const toolset of toolsets) { + const tools = ScraperAPIBaseServer.allTools.filter(tool => tool.toolset === toolset); + for (const tool of tools) { + tool.register({ server: this.server, sapiClient: this.sapiClient, getAuthToken }); + } + } + } - // targets - GoogleSearchParsedTool.register({ - server: this.server, - sapiClient: this.sapiClient, - getAuthToken, - }); - AmazonSearchParsedTool.register({ - server: this.server, - sapiClient: this.sapiClient, - getAuthToken, - }); - RedditPostTool.register({ server: this.server, sapiClient: this.sapiClient, getAuthToken }); - RedditSubredditTool.register({ - server: this.server, - sapiClient: this.sapiClient, - getAuthToken, - }); - ScreenshotTool.register({ - server: this.server, - sapiClient: this.sapiClient, - getAuthToken, - }); + registerAllTools() { + const getAuthToken = () => this.auth; + + for (const tool of ScraperAPIBaseServer.allTools) { + tool.register({ server: this.server, sapiClient: this.sapiClient, getAuthToken }); + } } registerResources() { diff --git a/src/server/sapi-http-server.ts b/src/server/sapi-http-server.ts index e4b8680..f998ed8 100644 --- a/src/server/sapi-http-server.ts +++ b/src/server/sapi-http-server.ts @@ -1,13 +1,14 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScraperApiClient } from '../clients/scraper-api-client'; import { ScraperAPIBaseServer } from './sapi-base-server'; +import { TOOLSET } from '../constants'; export class ScraperAPIHttpServer extends ScraperAPIBaseServer { server: McpServer; sapiClient: ScraperApiClient; - constructor() { - super(); + constructor({ toolsets = [] }: { toolsets: TOOLSET[] }) { + super({ toolsets }); } } diff --git a/src/server/sapi-stdio-server.ts b/src/server/sapi-stdio-server.ts index c177659..427e0f9 100644 --- a/src/server/sapi-stdio-server.ts +++ b/src/server/sapi-stdio-server.ts @@ -1,14 +1,23 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScraperApiClient } from '../clients/scraper-api-client'; import { ScraperAPIBaseServer } from './sapi-base-server'; +import { TOOLSET } from '../constants'; export class ScraperAPIStdioServer extends ScraperAPIBaseServer { server: McpServer; sapiClient: ScraperApiClient; - constructor({ sapiUsername, sapiPassword }: { sapiUsername: string; sapiPassword: string }) { - super(); + constructor({ + sapiUsername, + sapiPassword, + toolsets = [], + }: { + sapiUsername: string; + sapiPassword: string; + toolsets: TOOLSET[]; + }) { + super({ toolsets }); this.setAuthToken(Buffer.from(`${sapiUsername}:${sapiPassword}`).toString('base64')); } diff --git a/src/tools/chatgpt-tool.ts b/src/tools/chatgpt-tool.ts index 461ebf2..1a8e5a8 100644 --- a/src/tools/chatgpt-tool.ts +++ b/src/tools/chatgpt-tool.ts @@ -10,9 +10,11 @@ export class ChatGPTTool { static register = ({ server, sapiClient, + getAuthToken, }: { server: McpServer; sapiClient: ScraperApiClient; + getAuthToken: () => string; }) => { server.registerTool( 'chatgpt', @@ -35,7 +37,9 @@ export class ChatGPTTool { parse: true, } satisfies ScraperAPIParams; - const { data } = await sapiClient.scrape({ scrapingParams: params }); + const auth = getAuthToken(); + + const { data } = await sapiClient.scrape({ auth, scrapingParams: params }); const text = JSON.stringify(data, null, 2); diff --git a/src/tools/perplexity-tool.ts b/src/tools/perplexity-tool.ts index 8ad8093..306a34b 100644 --- a/src/tools/perplexity-tool.ts +++ b/src/tools/perplexity-tool.ts @@ -10,9 +10,11 @@ export class PerplexityTool { static register = ({ server, sapiClient, + getAuthToken, }: { server: McpServer; sapiClient: ScraperApiClient; + getAuthToken: () => string; }) => { server.registerTool( 'perplexity', @@ -35,7 +37,9 @@ export class PerplexityTool { parse: true, } satisfies ScraperAPIParams; - const { data } = await sapiClient.scrape({ scrapingParams: params }); + const auth = getAuthToken(); + + const { data } = await sapiClient.scrape({ auth, scrapingParams: params }); const text = JSON.stringify(data, null, 2); diff --git a/src/tools/screenshot-tool.ts b/src/tools/screenshot-tool.ts index 360ebbe..57b3674 100644 --- a/src/tools/screenshot-tool.ts +++ b/src/tools/screenshot-tool.ts @@ -3,8 +3,10 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ScrapingMCPParams } from 'types'; import { ScraperApiClient } from '../clients/scraper-api-client'; import { zodGeo } from '../zod/zod-types'; +import { TOOLSET } from '../constants'; export class ScreenshotTool { + static toolset = TOOLSET.WEB; static register = ({ server, sapiClient, diff --git a/src/tools/tool.ts b/src/tools/tool.ts index 72abc66..7da0b34 100644 --- a/src/tools/tool.ts +++ b/src/tools/tool.ts @@ -4,5 +4,9 @@ import { TOOLSET } from '../constants'; export type ToolClass = { readonly toolset: TOOLSET; - register: (args: { server: McpServer; sapiClient: ScraperApiClient }) => void; + register: (args: { + server: McpServer; + sapiClient: ScraperApiClient; + getAuthToken: () => string; + }) => void; }; From 7b6ab31d30352a7e660c370a71af80f793276a5f Mon Sep 17 00:00:00 2001 From: Domantas Jurkus Date: Fri, 17 Apr 2026 12:23:28 +0300 Subject: [PATCH 5/8] update readme --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 0e4fdab..e5708c8 100644 --- a/README.md +++ b/README.md @@ -84,21 +84,6 @@ this: -## Tools - -The server exposes the following tools: - -| Tool | Description | Example prompt | -| ---------------------- | ------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------- | -| `scrape_as_markdown` | Scrapes any target URL, expects a URL to be given via prompt. Returns results in Markdown. | Scrape peacock.com from a US IP address and tell me the pricing. | -| `screenshot` | Captures a screenshot of any webpage and returns it as a PNG image. | Take a screenshot of github.com from a US IP address. | -| `google_search_parsed` | Scrapes Google Search for a given query, and returns parsed results. | Scrape Google Search for shoes and tell me the top position. | -| `amazon_search_parsed` | Scrapes Amazon Search for a given query, and returns parsed results. | Scrape Amazon Search for toothbrushes. | -| `reddit_post` | Scrapes a specific Reddit post for a given query, and returns parsed results. | Scrape the following Reddit post: https://www.reddit.com/r/horseracing/comments/1nsrn3/ | -| `reddit_subreddit` | Scrapes a specific Reddit subreddit for a given query, and returns parsed results. | Scrape the top 5 posts on r/Python this week. | -| `chatgpt` | Search and interact with ChatGPT for AI-powered responses and conversations. | Ask ChatGPT to explain quantum computing in simple terms. | -| `perplexity` | Search and interact with Perplexity for AI-powered responses and conversations. | Ask Perplexity what the latest trends in web development are. | - ## Toolsets Tools are organized into toolsets. You can selectively enable specific toolsets by passing a @@ -122,6 +107,21 @@ When no toolsets are specified, all tools are registered. | `social_media` | `reddit_post`, `reddit_subreddit` | | `ai` | `chatgpt`, `perplexity` | +## Tools + +The server exposes the following tools: + +| Tool | Description | Example prompt | +| ---------------------- | ------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------- | +| `scrape_as_markdown` | Scrapes any target URL, expects a URL to be given via prompt. Returns results in Markdown. | Scrape peacock.com from a US IP address and tell me the pricing. | +| `screenshot` | Captures a screenshot of any webpage and returns it as a PNG image. | Take a screenshot of github.com from a US IP address. | +| `google_search_parsed` | Scrapes Google Search for a given query, and returns parsed results. | Scrape Google Search for shoes and tell me the top position. | +| `amazon_search_parsed` | Scrapes Amazon Search for a given query, and returns parsed results. | Scrape Amazon Search for toothbrushes. | +| `reddit_post` | Scrapes a specific Reddit post for a given query, and returns parsed results. | Scrape the following Reddit post: https://www.reddit.com/r/horseracing/comments/1nsrn3/ | +| `reddit_subreddit` | Scrapes a specific Reddit subreddit for a given query, and returns parsed results. | Scrape the top 5 posts on r/Python this week. | +| `chatgpt` | Search and interact with ChatGPT for AI-powered responses and conversations. | Ask ChatGPT to explain quantum computing in simple terms. | +| `perplexity` | Search and interact with Perplexity for AI-powered responses and conversations. | Ask Perplexity what the latest trends in web development are. | + ## Parameters The following parameters are inferred from user prompts: From 3ae5944fc7e8705f4d0d2c92783616858c32c04b Mon Sep 17 00:00:00 2001 From: Domantas Jurkus Date: Fri, 17 Apr 2026 12:31:44 +0300 Subject: [PATCH 6/8] up --- src/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index 2634f0c..31d0142 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,7 @@ import 'dotenv/config'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; -import { ScraperAPIMCPServer } from './sapi-mcp-server'; +import { ScraperAPIStdioServer } from 'server/sapi-stdio-server'; import { TOOLSET } from './constants'; if (process.env.ENABLE_MCPS_LOGGER) { @@ -40,7 +40,7 @@ async function main() { const toolsets = resolveToolsets(process.env.TOOLSETS); - const sapiMcpServer = new ScraperAPIMCPServer({ + const sapiMcpServer = new ScraperAPIStdioServer({ sapiUsername, sapiPassword, toolsets, From a9075847d2aa1a62a554a0a2dea10ba5bcd6bf8d Mon Sep 17 00:00:00 2001 From: Domantas Jurkus Date: Fri, 17 Apr 2026 12:38:38 +0300 Subject: [PATCH 7/8] update readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e5708c8..9ec175f 100644 --- a/README.md +++ b/README.md @@ -100,11 +100,11 @@ comma-separated list via the `toolsets` query parameter: When no toolsets are specified, all tools are registered. -| Toolset | Tools | -| -------------- | -------------------------------------------- | -| `web` | `scrape_as_markdown`, `google_search_parsed` | -| `ecommerce` | `amazon_search_parsed` | -| `social_media` | `reddit_post`, `reddit_subreddit` | +| Toolset | Tools | +| -------------- | -------------------------------------------------------------- | +| `web` | `scrape_as_markdown`, `screenshot`, `google_search_parsed` | +| `ecommerce` | `amazon_search_parsed` | +| `social_media` | `reddit_post`, `reddit_subreddit` | | `ai` | `chatgpt`, `perplexity` | ## Tools From 0ff6d44063cf7492b112cb60feb45ac2cce13638 Mon Sep 17 00:00:00 2001 From: Domantas Jurkus Date: Mon, 20 Apr 2026 15:49:56 +0300 Subject: [PATCH 8/8] Move google seach tool --- README.md | 5 +++-- src/constants.ts | 1 + src/tools/google-search-parsed-tool.ts | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9ec175f..62d8d16 100644 --- a/README.md +++ b/README.md @@ -102,10 +102,11 @@ When no toolsets are specified, all tools are registered. | Toolset | Tools | | -------------- | -------------------------------------------------------------- | -| `web` | `scrape_as_markdown`, `screenshot`, `google_search_parsed` | +| `web` | `scrape_as_markdown`, `screenshot` | +| `search` | `google_search_parsed` | | `ecommerce` | `amazon_search_parsed` | | `social_media` | `reddit_post`, `reddit_subreddit` | -| `ai` | `chatgpt`, `perplexity` | +| `ai` | `chatgpt`, `perplexity` | ## Tools diff --git a/src/constants.ts b/src/constants.ts index c622029..658ab37 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -1,5 +1,6 @@ export enum TOOLSET { WEB = 'web', + SEARCH = 'search', ECOMMERCE = 'ecommerce', SOCIAL_MEDIA = 'social_media', AI = 'ai', diff --git a/src/tools/google-search-parsed-tool.ts b/src/tools/google-search-parsed-tool.ts index 2ef1fdb..b3b9cc0 100644 --- a/src/tools/google-search-parsed-tool.ts +++ b/src/tools/google-search-parsed-tool.ts @@ -7,7 +7,7 @@ import { removeKeyFromNestedObject } from '../utils'; import { zodGeo, zodLocale, zodJsRender } from '../zod/zod-types'; export class GoogleSearchParsedTool { - static toolset = TOOLSET.WEB; + static toolset = TOOLSET.SEARCH; static FIELDS_WITH_HIGH_CHAR_COUNT = [ 'images', 'image_data',