Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions src/clis/xiaohongshu/search.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { describe, expect, it, vi } from 'vitest';
import type { IPage } from '../../types.js';
import { getRegistry } from '../../registry.js';
import './search.js';

function createPageMock(evaluateResults: any[]): IPage {
const evaluate = vi.fn();
for (const result of evaluateResults) {
evaluate.mockResolvedValueOnce(result);
}

return {
goto: vi.fn().mockResolvedValue(undefined),
evaluate,
snapshot: vi.fn().mockResolvedValue(undefined),
click: vi.fn().mockResolvedValue(undefined),
typeText: vi.fn().mockResolvedValue(undefined),
pressKey: vi.fn().mockResolvedValue(undefined),
scrollTo: vi.fn().mockResolvedValue(undefined),
getFormState: vi.fn().mockResolvedValue({ forms: [], orphanFields: [] }),
wait: vi.fn().mockResolvedValue(undefined),
tabs: vi.fn().mockResolvedValue([]),
closeTab: vi.fn().mockResolvedValue(undefined),
newTab: vi.fn().mockResolvedValue(undefined),
selectTab: vi.fn().mockResolvedValue(undefined),
networkRequests: vi.fn().mockResolvedValue([]),
consoleMessages: vi.fn().mockResolvedValue([]),
scroll: vi.fn().mockResolvedValue(undefined),
autoScroll: vi.fn().mockResolvedValue(undefined),
installInterceptor: vi.fn().mockResolvedValue(undefined),
getInterceptedRequests: vi.fn().mockResolvedValue([]),
getCookies: vi.fn().mockResolvedValue([]),
screenshot: vi.fn().mockResolvedValue(''),
};
}

describe('xiaohongshu search', () => {
it('throws a clear error when the search page is blocked by a login wall', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
expect(cmd?.func).toBeTypeOf('function');

const page = createPageMock([
{
loginWall: true,
results: [],
},
]);

await expect(cmd!.func!(page, { query: '特斯拉', limit: 5 })).rejects.toThrow(
'Xiaohongshu search results are blocked behind a login wall'
);
});

it('returns ranked results with search_result url and author_url preserved', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
expect(cmd?.func).toBeTypeOf('function');

const detailUrl =
'https://www.xiaohongshu.com/search_result/68e90be80000000004022e66?xsec_token=test-token&xsec_source=';
const authorUrl =
'https://www.xiaohongshu.com/user/profile/635a9c720000000018028b40?xsec_token=user-token&xsec_source=pc_search';

const page = createPageMock([
{
loginWall: false,
results: [
{
title: '某鱼买FSD被坑了4万',
author: '随风',
likes: '261',
url: detailUrl,
author_url: authorUrl,
},
],
},
]);

const result = await cmd!.func!(page, { query: '特斯拉', limit: 1 });

// Should only do one goto (the search page itself), no per-note detail navigation
expect((page.goto as any).mock.calls).toHaveLength(1);

expect(result).toEqual([
{
rank: 1,
title: '某鱼买FSD被坑了4万',
author: '随风',
likes: '261',
url: detailUrl,
author_url: authorUrl,
},
]);
});

it('filters out results with no title and respects the limit', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
expect(cmd?.func).toBeTypeOf('function');

const page = createPageMock([
{
loginWall: false,
results: [
{
title: 'Result A',
author: 'UserA',
likes: '10',
url: 'https://www.xiaohongshu.com/search_result/aaa',
author_url: '',
},
{
title: '',
author: 'UserB',
likes: '5',
url: 'https://www.xiaohongshu.com/search_result/bbb',
author_url: '',
},
{
title: 'Result C',
author: 'UserC',
likes: '3',
url: 'https://www.xiaohongshu.com/search_result/ccc',
author_url: '',
},
],
},
]);

const result = (await cmd!.func!(page, { query: '测试', limit: 1 })) as any[];

// limit=1 should return only the first valid-titled result
expect(result).toHaveLength(1);
expect(result[0]).toMatchObject({ rank: 1, title: 'Result A' });
});
});
66 changes: 51 additions & 15 deletions src/clis/xiaohongshu/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ cli({
{ name: 'query', required: true, positional: true, help: 'Search keyword' },
{ name: 'limit', type: 'int', default: 20, help: 'Number of results' },
],
columns: ['rank', 'title', 'author', 'likes'],
columns: ['rank', 'title', 'author', 'likes', 'url'],
func: async (page, kwargs) => {
const keyword = encodeURIComponent(kwargs.query);
await page.goto(
Expand All @@ -29,34 +29,70 @@ cli({
// Scroll a couple of times to load more results
await page.autoScroll({ times: 2 });

const data = await page.evaluate(`
const payload = await page.evaluate(`
(() => {
const notes = document.querySelectorAll('section.note-item');
const loginWall = /登录后查看搜索结果/.test(document.body.innerText || '');

const normalizeUrl = (href) => {
if (!href) return '';
if (href.startsWith('http://') || href.startsWith('https://')) return href;
if (href.startsWith('/')) return 'https://www.xiaohongshu.com' + href;
return '';
};

const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();

const results = [];
notes.forEach(el => {
const seen = new Set();

document.querySelectorAll('section.note-item').forEach(el => {
// Skip "related searches" sections
if (el.classList.contains('query-note-item')) return;

const titleEl = el.querySelector('.title, .note-title, a.title');
const nameEl = el.querySelector('.name, .author-name, .nick-name');
const titleEl = el.querySelector('.title, .note-title, a.title, .footer .title span');
const nameEl = el.querySelector('a.author .name, .name, .author-name, .nick-name, a.author');
const likesEl = el.querySelector('.count, .like-count, .like-wrapper .count');
const linkEl = el.querySelector('a[href*="/explore/"], a[href*="/search_result/"], a[href*="/note/"]');
// Prefer search_result link (preserves xsec_token) over generic /explore/ link
const detailLinkEl =
el.querySelector('a.cover.mask') ||
el.querySelector('a[href*="/search_result/"]') ||
el.querySelector('a[href*="/explore/"]') ||
el.querySelector('a[href*="/note/"]');
const authorLinkEl = el.querySelector('a.author, a[href*="/user/profile/"]');

const href = linkEl?.getAttribute('href') || '';
const noteId = href.match(/\\/(?:explore|note)\\/([a-zA-Z0-9]+)/)?.[1] || '';
const url = normalizeUrl(detailLinkEl?.getAttribute('href') || '');
if (!url) return;

const key = url;
if (seen.has(key)) return;
seen.add(key);

results.push({
title: (titleEl?.textContent || '').trim(),
author: (nameEl?.textContent || '').trim(),
likes: (likesEl?.textContent || '0').trim(),
url: noteId ? 'https://www.xiaohongshu.com/explore/' + noteId : '',
title: cleanText(titleEl?.textContent || ''),
author: cleanText(nameEl?.textContent || ''),
likes: cleanText(likesEl?.textContent || '0'),
url,
author_url: normalizeUrl(authorLinkEl?.getAttribute('href') || ''),
});
});
return results;

return {
loginWall,
results,
};
})()
`);

if (!Array.isArray(data)) return [];
if (!payload || typeof payload !== 'object') return [];

if ((payload as any).loginWall) {
throw new Error(
'Xiaohongshu search results are blocked behind a login wall for the current browser session. ' +
'Open https://www.xiaohongshu.com/search_result in Chrome and sign in, then retry.'
);
}

const data: any[] = Array.isArray((payload as any).results) ? (payload as any).results : [];
return data
.filter((item: any) => item.title)
.slice(0, kwargs.limit)
Expand Down