Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/clis/xiaohongshu/search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,36 @@ describe('xiaohongshu search', () => {
expect(result).toHaveLength(1);
expect(result[0]).toMatchObject({ rank: 1, title: 'Result A' });
});

it('retries once when the first pass returns empty results', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
expect(cmd?.func).toBeTypeOf('function');

const page = createPageMock([
// First pass: login check + empty extraction
false,
{ loginWall: false, results: [] },
// Retry pass: login check + non-empty extraction
false,
{
loginWall: false,
results: [
{
title: 'Retry Result',
author: 'UserR',
likes: '9',
url: 'https://www.xiaohongshu.com/search_result/69b739f00000000000000000',
author_url: '',
},
],
},
]);

const result = (await cmd!.func!(page, { query: '测试重试', limit: 5 })) as any[];
expect(result).toHaveLength(1);
expect(result[0]).toMatchObject({ title: 'Retry Result' });
expect(page.goto).toHaveBeenCalledTimes(2);
});
});

describe('noteIdToDate (ObjectID timestamp parsing)', () => {
Expand Down
76 changes: 44 additions & 32 deletions src/clis/xiaohongshu/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,28 +39,30 @@ cli({
columns: ['rank', 'title', 'author', 'likes', 'published_at', 'url'],
func: async (page, kwargs) => {
const keyword = encodeURIComponent(kwargs.query);
await page.goto(
`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`
);
await page.wait(3);

// Early login-wall detection: XHS may show a login gate instead of
// results. Check *before* autoScroll to avoid crashing on a page
// that has no meaningful content to scroll through.
const loginCheck = await page.evaluate(`
const searchUrl =
`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`;

const fetchAttempt = async () => {
await page.goto(searchUrl);
await page.wait(3);

// Early login-wall detection: XHS may show a login gate instead of
// results. Check *before* autoScroll to avoid crashing on a page
// that has no meaningful content to scroll through.
const loginCheck = await page.evaluate(`
(() => /登录后查看搜索结果/.test(document.body?.innerText || ''))()
`);
if (loginCheck) {
throw new AuthRequiredError(
'www.xiaohongshu.com',
'Xiaohongshu search results are blocked behind a login wall',
);
}
if (loginCheck) {
throw new AuthRequiredError(
'www.xiaohongshu.com',
'Xiaohongshu search results are blocked behind a login wall',
);
}

// Scroll a couple of times to load more results
await page.autoScroll({ times: 2 });
// Scroll a couple of times to load more results
await page.autoScroll({ times: 2 });

const payload = await page.evaluate(`
const payload = await page.evaluate(`
(() => {
const loginWall = /登录后查看搜索结果/.test(document.body.innerText || '');

Expand Down Expand Up @@ -114,20 +116,30 @@ cli({
})()
`);

if (!payload || typeof payload !== 'object') return [];

if ((payload as any).loginWall) {
throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
if (!payload || typeof payload !== 'object') return [];

if ((payload as any).loginWall) {
throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
}

const data: any[] = Array.isArray((payload as any).results) ? (payload as any).results : [];
return data
.filter((item: any) => item.title)
.slice(0, kwargs.limit)
.map((item: any, i: number) => ({
rank: i + 1,
...item,
published_at: noteIdToDate(item.url),
}));
};

let results = await fetchAttempt();
if (!results.length) {
// XHS search can intermittently render blank blocks in the first paint.
// Retry once with a fresh navigation before returning empty.
await page.wait(1);
results = await fetchAttempt();
}

const data: any[] = Array.isArray((payload as any).results) ? (payload as any).results : [];
return data
.filter((item: any) => item.title)
.slice(0, kwargs.limit)
.map((item: any, i: number) => ({
rank: i + 1,
...item,
published_at: noteIdToDate(item.url),
}));
return results;
},
});