diff --git a/src/clis/xiaohongshu/search.test.ts b/src/clis/xiaohongshu/search.test.ts new file mode 100644 index 0000000..25c9487 --- /dev/null +++ b/src/clis/xiaohongshu/search.test.ts @@ -0,0 +1,134 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { IPage } from '../../types.js'; +import { getRegistry } from '../../registry.js'; +import './search.js'; + +function createPageMock(evaluateResults: any[]): IPage { + const evaluate = vi.fn(); + for (const result of evaluateResults) { + evaluate.mockResolvedValueOnce(result); + } + + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate, + snapshot: vi.fn().mockResolvedValue(undefined), + click: vi.fn().mockResolvedValue(undefined), + typeText: vi.fn().mockResolvedValue(undefined), + pressKey: vi.fn().mockResolvedValue(undefined), + scrollTo: vi.fn().mockResolvedValue(undefined), + getFormState: vi.fn().mockResolvedValue({ forms: [], orphanFields: [] }), + wait: vi.fn().mockResolvedValue(undefined), + tabs: vi.fn().mockResolvedValue([]), + closeTab: vi.fn().mockResolvedValue(undefined), + newTab: vi.fn().mockResolvedValue(undefined), + selectTab: vi.fn().mockResolvedValue(undefined), + networkRequests: vi.fn().mockResolvedValue([]), + consoleMessages: vi.fn().mockResolvedValue([]), + scroll: vi.fn().mockResolvedValue(undefined), + autoScroll: vi.fn().mockResolvedValue(undefined), + installInterceptor: vi.fn().mockResolvedValue(undefined), + getInterceptedRequests: vi.fn().mockResolvedValue([]), + getCookies: vi.fn().mockResolvedValue([]), + screenshot: vi.fn().mockResolvedValue(''), + }; +} + +describe('xiaohongshu search', () => { + it('throws a clear error when the search page is blocked by a login wall', async () => { + const cmd = getRegistry().get('xiaohongshu/search'); + expect(cmd?.func).toBeTypeOf('function'); + + const page = createPageMock([ + { + loginWall: true, + results: [], + }, + ]); + + await expect(cmd!.func!(page, { query: '特斯拉', limit: 5 })).rejects.toThrow( + 'Xiaohongshu search results are blocked behind a login wall' + ); + }); + + it('returns ranked results with search_result url and author_url preserved', async () => { + const cmd = getRegistry().get('xiaohongshu/search'); + expect(cmd?.func).toBeTypeOf('function'); + + const detailUrl = + 'https://www.xiaohongshu.com/search_result/68e90be80000000004022e66?xsec_token=test-token&xsec_source='; + const authorUrl = + 'https://www.xiaohongshu.com/user/profile/635a9c720000000018028b40?xsec_token=user-token&xsec_source=pc_search'; + + const page = createPageMock([ + { + loginWall: false, + results: [ + { + title: '某鱼买FSD被坑了4万', + author: '随风', + likes: '261', + url: detailUrl, + author_url: authorUrl, + }, + ], + }, + ]); + + const result = await cmd!.func!(page, { query: '特斯拉', limit: 1 }); + + // Should only do one goto (the search page itself), no per-note detail navigation + expect((page.goto as any).mock.calls).toHaveLength(1); + + expect(result).toEqual([ + { + rank: 1, + title: '某鱼买FSD被坑了4万', + author: '随风', + likes: '261', + url: detailUrl, + author_url: authorUrl, + }, + ]); + }); + + it('filters out results with no title and respects the limit', async () => { + const cmd = getRegistry().get('xiaohongshu/search'); + expect(cmd?.func).toBeTypeOf('function'); + + const page = createPageMock([ + { + loginWall: false, + results: [ + { + title: 'Result A', + author: 'UserA', + likes: '10', + url: 'https://www.xiaohongshu.com/search_result/aaa', + author_url: '', + }, + { + title: '', + author: 'UserB', + likes: '5', + url: 'https://www.xiaohongshu.com/search_result/bbb', + author_url: '', + }, + { + title: 'Result C', + author: 'UserC', + likes: '3', + url: 'https://www.xiaohongshu.com/search_result/ccc', + author_url: '', + }, + ], + }, + ]); + + const result = (await cmd!.func!(page, { query: '测试', limit: 1 })) as any[]; + + // limit=1 should return only the first valid-titled result + expect(result).toHaveLength(1); + expect(result[0]).toMatchObject({ rank: 1, title: 'Result A' }); + }); +}); diff --git a/src/clis/xiaohongshu/search.ts b/src/clis/xiaohongshu/search.ts index 4d6232a..cde2336 100644 --- a/src/clis/xiaohongshu/search.ts +++ b/src/clis/xiaohongshu/search.ts @@ -18,7 +18,7 @@ cli({ { name: 'query', required: true, positional: true, help: 'Search keyword' }, { name: 'limit', type: 'int', default: 20, help: 'Number of results' }, ], - columns: ['rank', 'title', 'author', 'likes'], + columns: ['rank', 'title', 'author', 'likes', 'url'], func: async (page, kwargs) => { const keyword = encodeURIComponent(kwargs.query); await page.goto( @@ -29,34 +29,70 @@ cli({ // Scroll a couple of times to load more results await page.autoScroll({ times: 2 }); - const data = await page.evaluate(` + const payload = await page.evaluate(` (() => { - const notes = document.querySelectorAll('section.note-item'); + const loginWall = /登录后查看搜索结果/.test(document.body.innerText || ''); + + const normalizeUrl = (href) => { + if (!href) return ''; + if (href.startsWith('http://') || href.startsWith('https://')) return href; + if (href.startsWith('/')) return 'https://www.xiaohongshu.com' + href; + return ''; + }; + + const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim(); + const results = []; - notes.forEach(el => { + const seen = new Set(); + + document.querySelectorAll('section.note-item').forEach(el => { // Skip "related searches" sections if (el.classList.contains('query-note-item')) return; - const titleEl = el.querySelector('.title, .note-title, a.title'); - const nameEl = el.querySelector('.name, .author-name, .nick-name'); + const titleEl = el.querySelector('.title, .note-title, a.title, .footer .title span'); + const nameEl = el.querySelector('a.author .name, .name, .author-name, .nick-name, a.author'); const likesEl = el.querySelector('.count, .like-count, .like-wrapper .count'); - const linkEl = el.querySelector('a[href*="/explore/"], a[href*="/search_result/"], a[href*="/note/"]'); + // Prefer search_result link (preserves xsec_token) over generic /explore/ link + const detailLinkEl = + el.querySelector('a.cover.mask') || + el.querySelector('a[href*="/search_result/"]') || + el.querySelector('a[href*="/explore/"]') || + el.querySelector('a[href*="/note/"]'); + const authorLinkEl = el.querySelector('a.author, a[href*="/user/profile/"]'); - const href = linkEl?.getAttribute('href') || ''; - const noteId = href.match(/\\/(?:explore|note)\\/([a-zA-Z0-9]+)/)?.[1] || ''; + const url = normalizeUrl(detailLinkEl?.getAttribute('href') || ''); + if (!url) return; + + const key = url; + if (seen.has(key)) return; + seen.add(key); results.push({ - title: (titleEl?.textContent || '').trim(), - author: (nameEl?.textContent || '').trim(), - likes: (likesEl?.textContent || '0').trim(), - url: noteId ? 'https://www.xiaohongshu.com/explore/' + noteId : '', + title: cleanText(titleEl?.textContent || ''), + author: cleanText(nameEl?.textContent || ''), + likes: cleanText(likesEl?.textContent || '0'), + url, + author_url: normalizeUrl(authorLinkEl?.getAttribute('href') || ''), }); }); - return results; + + return { + loginWall, + results, + }; })() `); - if (!Array.isArray(data)) return []; + if (!payload || typeof payload !== 'object') return []; + + if ((payload as any).loginWall) { + throw new Error( + 'Xiaohongshu search results are blocked behind a login wall for the current browser session. ' + + 'Open https://www.xiaohongshu.com/search_result in Chrome and sign in, then retry.' + ); + } + + const data: any[] = Array.isArray((payload as any).results) ? (payload as any).results : []; return data .filter((item: any) => item.title) .slice(0, kwargs.limit)