From c80b6d4f6e9e4e7b7a4f6d6878095f433130cc70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B3=BD=E5=8A=A0=E6=AD=A6?= Date: Thu, 2 Apr 2026 12:41:34 +0800 Subject: [PATCH] fix(xiaohongshu): retry once on intermittent empty first paint --- src/clis/xiaohongshu/search.test.ts | 30 ++++++++++++ src/clis/xiaohongshu/search.ts | 76 +++++++++++++++++------------ 2 files changed, 74 insertions(+), 32 deletions(-) diff --git a/src/clis/xiaohongshu/search.test.ts b/src/clis/xiaohongshu/search.test.ts index 15d8799e..60aa704e 100644 --- a/src/clis/xiaohongshu/search.test.ts +++ b/src/clis/xiaohongshu/search.test.ts @@ -140,6 +140,36 @@ describe('xiaohongshu search', () => { expect(result).toHaveLength(1); expect(result[0]).toMatchObject({ rank: 1, title: 'Result A' }); }); + + it('retries once when the first pass returns empty results', async () => { + const cmd = getRegistry().get('xiaohongshu/search'); + expect(cmd?.func).toBeTypeOf('function'); + + const page = createPageMock([ + // First pass: login check + empty extraction + false, + { loginWall: false, results: [] }, + // Retry pass: login check + non-empty extraction + false, + { + loginWall: false, + results: [ + { + title: 'Retry Result', + author: 'UserR', + likes: '9', + url: 'https://www.xiaohongshu.com/search_result/69b739f00000000000000000', + author_url: '', + }, + ], + }, + ]); + + const result = (await cmd!.func!(page, { query: '测试重试', limit: 5 })) as any[]; + expect(result).toHaveLength(1); + expect(result[0]).toMatchObject({ title: 'Retry Result' }); + expect(page.goto).toHaveBeenCalledTimes(2); + }); }); describe('noteIdToDate (ObjectID timestamp parsing)', () => { diff --git a/src/clis/xiaohongshu/search.ts b/src/clis/xiaohongshu/search.ts index 506fccb7..a9c12807 100644 --- a/src/clis/xiaohongshu/search.ts +++ b/src/clis/xiaohongshu/search.ts @@ -39,28 +39,30 @@ cli({ columns: ['rank', 'title', 'author', 'likes', 'published_at', 'url'], func: async (page, kwargs) => { const keyword = encodeURIComponent(kwargs.query); - await page.goto( - `https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes` - ); - await page.wait(3); - - // Early login-wall detection: XHS may show a login gate instead of - // results. Check *before* autoScroll to avoid crashing on a page - // that has no meaningful content to scroll through. - const loginCheck = await page.evaluate(` + const searchUrl = + `https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`; + + const fetchAttempt = async () => { + await page.goto(searchUrl); + await page.wait(3); + + // Early login-wall detection: XHS may show a login gate instead of + // results. Check *before* autoScroll to avoid crashing on a page + // that has no meaningful content to scroll through. + const loginCheck = await page.evaluate(` (() => /登录后查看搜索结果/.test(document.body?.innerText || ''))() `); - if (loginCheck) { - throw new AuthRequiredError( - 'www.xiaohongshu.com', - 'Xiaohongshu search results are blocked behind a login wall', - ); - } + if (loginCheck) { + throw new AuthRequiredError( + 'www.xiaohongshu.com', + 'Xiaohongshu search results are blocked behind a login wall', + ); + } - // Scroll a couple of times to load more results - await page.autoScroll({ times: 2 }); + // Scroll a couple of times to load more results + await page.autoScroll({ times: 2 }); - const payload = await page.evaluate(` + const payload = await page.evaluate(` (() => { const loginWall = /登录后查看搜索结果/.test(document.body.innerText || ''); @@ -114,20 +116,30 @@ cli({ })() `); - if (!payload || typeof payload !== 'object') return []; - - if ((payload as any).loginWall) { - throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall'); + if (!payload || typeof payload !== 'object') return []; + + if ((payload as any).loginWall) { + throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall'); + } + + const data: any[] = Array.isArray((payload as any).results) ? (payload as any).results : []; + return data + .filter((item: any) => item.title) + .slice(0, kwargs.limit) + .map((item: any, i: number) => ({ + rank: i + 1, + ...item, + published_at: noteIdToDate(item.url), + })); + }; + + let results = await fetchAttempt(); + if (!results.length) { + // XHS search can intermittently render blank blocks in the first paint. + // Retry once with a fresh navigation before returning empty. + await page.wait(1); + results = await fetchAttempt(); } - - const data: any[] = Array.isArray((payload as any).results) ? (payload as any).results : []; - return data - .filter((item: any) => item.title) - .slice(0, kwargs.limit) - .map((item: any, i: number) => ({ - rank: i + 1, - ...item, - published_at: noteIdToDate(item.url), - })); + return results; }, });