From 89d1ced4619288829fb57ff0fa63eb1928c1b3ce Mon Sep 17 00:00:00 2001 From: nj-io <26359601+nj-io@users.noreply.github.com> Date: Sun, 5 Apr 2026 11:00:34 +0000 Subject: [PATCH 1/2] feat: enhanced scrapeLikedTweets with rich data and timestamp filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the broken xeepy-based x_get_likes handler with a proper scraper in src/scrapers/twitter/index.js, following the same pattern as scrapeBookmarks. Rich data per tweet: - text (with "Show more" expansion), author, handle, timestamp, link - images (attributed to correct author by handle matching) - quoted tweets (detected via multiple UserAvatar-Container elements) - X Articles (title, description, cover image via article-cover-image) - link cards (via card.wrapper) - engagement stats (replies, retweets, likes, views from role="group") Timestamp filtering: - from: only include likes from this date onward, stops scrolling early when older tweets are reached (reverse chronological optimization) - to: only include likes up to this date, skips newer but keeps scrolling - Works in conjunction with limit Bug fixes: - "Show more" clicks one at a time (X re-renders DOM after each click) - Auth check after navigation — fails fast on expired cookies - Scroll-based pagination with deduplication - Removes x_get_likes from xeepyTools, routes through local-tools.js Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp/local-tools.js | 8 ++ src/mcp/server.js | 24 +---- src/scrapers/index.js | 2 + src/scrapers/twitter/index.js | 183 +++++++++++++++++++++++++++++++++- 4 files changed, 197 insertions(+), 20 deletions(-) diff --git a/src/mcp/local-tools.js b/src/mcp/local-tools.js index 7ad47e4a..0e0854f9 100644 --- a/src/mcp/local-tools.js +++ b/src/mcp/local-tools.js @@ -20,6 +20,7 @@ import { scrapeTweets, searchTweets, scrapeThread, + scrapeLikedTweets, scrapeLikes, scrapeMedia, scrapeListMembers, @@ -630,6 +631,12 @@ export async function x_get_bookmarks({ limit = 100 }) { return scrapeBookmarks(pg, { limit }); } +export async function x_get_likes({ username, limit = 50, from, to }) { + const { page: pg } = await ensureBrowser(); + const likedTweets = await scrapeLikedTweets(pg, username, { limit, from, to }); + return { likedTweets, count: likedTweets.length, username }; +} + export async function x_clear_bookmarks() { const { page: pg } = await ensureBrowser(); await pg.goto('https://x.com/i/bookmarks', { waitUntil: 'networkidle2' }); @@ -1369,6 +1376,7 @@ export const toolMap = { x_reply, x_bookmark, x_get_bookmarks, + x_get_likes, x_clear_bookmarks, x_auto_like, // Discovery diff --git a/src/mcp/server.js b/src/mcp/server.js index 1f467775..5c25cedd 100755 --- a/src/mcp/server.js +++ b/src/mcp/server.js @@ -1903,12 +1903,14 @@ const TOOLS = [ }, { name: 'x_get_likes', - description: 'Scrape tweets that a user has liked. Shows what content a user engages with.', + description: 'Scrape tweets that a user has liked with rich data. Supports timestamp filtering — likes are reverse chronological, so scrolling stops early when it passes the "from" date.', inputSchema: { type: 'object', properties: { username: { type: 'string', description: 'Username (without @)' }, - limit: { type: 'number', description: 'Maximum liked tweets (default: 50)' }, + limit: { type: 'number', description: 'Maximum liked tweets to return (default: 50)' }, + from: { type: 'string', description: 'Only include likes from this date onward (e.g. "2026-03-01"). Stops scrolling when older tweets are reached.' }, + to: { type: 'string', description: 'Only include likes up to this date (e.g. "2026-03-31"). Skips newer tweets but keeps scrolling.' }, }, required: ['username'], }, @@ -2275,7 +2277,7 @@ async function executeTool(name, args) { const xeepyTools = [ 'x_get_replies', 'x_get_hashtag', 'x_get_likers', 'x_get_retweeters', 'x_get_media', 'x_get_recommendations', 'x_get_mentions', 'x_get_quote_tweets', - 'x_get_likes', 'x_auto_follow', 'x_follow_engagers', 'x_unfollow_all', + 'x_auto_follow', 'x_follow_engagers', 'x_unfollow_all', 'x_smart_unfollow', 'x_quote_tweet', 'x_auto_comment', 'x_auto_retweet', 'x_detect_bots', 'x_find_influencers', 'x_smart_target', 'x_crypto_analyze', 'x_grok_analyze_image', 'x_audience_insights', 'x_engagement_report', @@ -2475,22 +2477,6 @@ async function executeXeepyTool(name, args) { return { quotes, count: quotes.length }; } - case 'x_get_likes': { - const page = await localTools.getPage(); - await page.goto(`https://x.com/${args.username}/likes`, { waitUntil: 'networkidle2', timeout: 30000 }); - await new Promise(r => setTimeout(r, 3000)); - const likedTweets = await page.evaluate((limit) => { - const articles = document.querySelectorAll('article[data-testid="tweet"]'); - return Array.from(articles).slice(0, limit).map(el => { - const textEl = el.querySelector('[data-testid="tweetText"]'); - const userEl = el.querySelector('[data-testid="User-Name"]'); - const timeEl = el.querySelector('time'); - return { text: textEl?.textContent || '', author: userEl?.textContent || '', timestamp: timeEl?.getAttribute('datetime') || '' }; - }); - }, args.limit || 50); - return { likedTweets, count: likedTweets.length, username: args.username }; - } - // ── Follow Automation ── case 'x_auto_follow': { // Find users via search, then follow them with delays diff --git a/src/scrapers/index.js b/src/scrapers/index.js index ff6646f3..786efe38 100644 --- a/src/scrapers/index.js +++ b/src/scrapers/index.js @@ -79,6 +79,7 @@ export const { scrapeTweets, searchTweets, scrapeThread, + scrapeLikedTweets, scrapeLikes, scrapeHashtag, scrapeMedia, @@ -308,6 +309,7 @@ export default { scrapeTweets, searchTweets, scrapeThread, + scrapeLikedTweets, scrapeLikes, scrapeHashtag, scrapeMedia, diff --git a/src/scrapers/twitter/index.js b/src/scrapers/twitter/index.js index ded588d5..e2d7aa05 100644 --- a/src/scrapers/twitter/index.js +++ b/src/scrapers/twitter/index.js @@ -486,7 +486,187 @@ export async function scrapeThread(page, tweetUrl) { } // ============================================================================ -// Likes Scraper +// Liked Tweets Scraper (a user's liked tweets page) +// ============================================================================ + +/** + * Scrape a user's liked tweets (x.com/username/likes). + * Different from scrapeLikes which scrapes who liked a specific tweet. + * + * Returns rich data per tweet: text, author, handle, timestamp, link, images, + * quotedTweet, article, card, and engagement stats. + * + * @param {import('puppeteer').Page} page + * @param {string} username + * @param {object} options + * @param {number} [options.limit=50] - Max tweets to return + * @param {string} [options.from] - Only include likes from this date onward (stops scrolling when older) + * @param {string} [options.to] - Only include likes up to this date (skips newer, keeps scrolling) + */ +export async function scrapeLikedTweets(page, username, options = {}) { + const { limit = 50, from, to } = options; + + if (!username) throw new Error('Username is required for scrapeLikedTweets'); + + const fromDate = from ? new Date(from) : null; + const toDate = to ? new Date(to) : null; + if (fromDate && isNaN(fromDate.getTime())) throw new Error(`Invalid "from" date: ${from}`); + if (toDate && isNaN(toDate.getTime())) throw new Error(`Invalid "to" date: ${to}`); + + await page.goto(`https://x.com/${username}/likes`, { waitUntil: 'networkidle2', timeout: 30000 }); + // Auth check — fail fast on expired cookie + if (page.url().includes('/login') || page.url().includes('/i/flow/login')) { + throw new Error('Authentication failed — cookie may be expired.\n\nRun: xactions login'); + } + await randomDelay(2000, 3000); + + const likedTweets = []; + const seenLinks = new Set(); + let emptyScrolls = 0; + let passedFromDate = false; + + while (likedTweets.length < limit && emptyScrolls < 5 && !passedFromDate) { + // Click "Show more" buttons one at a time — X re-renders the DOM after + // each click, detaching all other button references. + for (let sm = 0; sm < 20; sm++) { + const btn = await page.$('button[data-testid="tweet-text-show-more-link"]'); + if (!btn) break; + try { + await btn.evaluate(b => b.scrollIntoView({ block: 'center' })); + await new Promise(r => setTimeout(r, 300)); + await btn.click(); + await new Promise(r => setTimeout(r, 1200)); + } catch { break; } + } + + const newTweets = await page.evaluate(() => { + return Array.from(document.querySelectorAll('article[data-testid="tweet"]')).map(el => { + const avatarContainers = el.querySelectorAll('[data-testid^="UserAvatar-Container-"]'); + const mainHandle = avatarContainers[0]?.getAttribute('data-testid')?.replace('UserAvatar-Container-', '') || ''; + + // Detect quoted tweet via second UserAvatar-Container + let quotedTweet = null; + if (avatarContainers.length >= 2) { + const qtHandle = avatarContainers[1]?.getAttribute('data-testid')?.replace('UserAvatar-Container-', '') || ''; + const allTexts = el.querySelectorAll('[data-testid="tweetText"]'); + const qtText = allTexts.length >= 2 ? allTexts[1].textContent || '' : ''; + const allTimes = el.querySelectorAll('time'); + const qtTime = allTimes.length >= 2 ? allTimes[1].getAttribute('datetime') || '' : ''; + const allUserNames = el.querySelectorAll('[data-testid="User-Name"]'); + const qtDisplayName = allUserNames.length >= 2 ? allUserNames[1].querySelector('span')?.textContent || '' : ''; + const qtImages = []; + if (qtHandle) { + el.querySelectorAll('a[href*="/photo/"]').forEach(a => { + if (a.href.includes('/' + qtHandle + '/')) { + const img = a.querySelector('img'); + if (img?.src) qtImages.push(img.src); + } + }); + } + const qtLinkEl = el.querySelector('a[href*="/' + qtHandle + '/status/"]'); + quotedTweet = { + text: qtText, author: qtDisplayName, handle: qtHandle, + timestamp: qtTime, link: qtLinkEl?.href || '', images: qtImages, + }; + } + + // Main tweet text + const allTexts = el.querySelectorAll('[data-testid="tweetText"]'); + const text = allTexts[0]?.textContent || ''; + const userEl = el.querySelector('[data-testid="User-Name"]'); + const timeEl = el.querySelector('time'); + const linkEl = el.querySelector('a[href*="/status/"]'); + + // Main tweet images + const images = []; + el.querySelectorAll('a[href*="/photo/"]').forEach(a => { + if (a.href.includes('/' + mainHandle + '/')) { + const img = a.querySelector('img'); + if (img?.src) images.push(img.src); + } + }); + + // X Article + let article = null; + const articleCover = el.querySelector('[data-testid="article-cover-image"]'); + if (articleCover) { + const contentDiv = articleCover.nextElementSibling; + const childDivs = contentDiv ? contentDiv.querySelectorAll(':scope > div') : []; + article = { + title: childDivs[0]?.textContent?.trim() || '', + description: childDivs[1]?.textContent?.trim() || '', + coverImage: articleCover.querySelector('img')?.src || '', + }; + } + + // Card/link preview + let card = null; + const cardWrapper = el.querySelector('[data-testid="card.wrapper"]'); + if (cardWrapper) { + const cardA = cardWrapper.querySelector('a[href]'); + const headingEl = cardWrapper.querySelector('[role="heading"]'); + card = { + title: headingEl?.textContent || cardA?.getAttribute('aria-label') || '', + link: cardA?.href || '', + }; + } + + // Engagement stats + const groupEl = el.querySelector('[role="group"]'); + const groupLabel = groupEl?.getAttribute('aria-label') || ''; + const parseNum = (p) => { const m = groupLabel.match(p); return m ? m[1] : '0'; }; + + const handle = userEl?.querySelector('a[href^="/"]')?.getAttribute('href')?.replace('/', '') || ''; + const displayName = userEl?.querySelector('span')?.textContent || ''; + const tweetLink = linkEl?.href || ''; + + if (article) { + article.tweetUrl = tweetLink; + if (!quotedTweet && tweetLink) { + const statusMatch = tweetLink.match(/\/status\/(\d+)/); + if (statusMatch) article.url = `https://x.com/${handle}/article/${statusMatch[1]}`; + } + } + + return { + text, author: displayName, handle, + timestamp: timeEl?.getAttribute('datetime') || '', + link: tweetLink, images, quotedTweet, article, card, + replies: parseNum(/([\d,]+)\s*repl/), + retweets: parseNum(/([\d,]+)\s*repost/), + likes: parseNum(/([\d,]+)\s*like/), + views: parseNum(/([\d,]+)\s*view/), + }; + }); + }); + + let added = 0; + for (const t of newTweets) { + if (!t.link || seenLinks.has(t.link)) continue; + seenLinks.add(t.link); + + const tweetDate = t.timestamp ? new Date(t.timestamp) : null; + if (tweetDate) { + if (fromDate && tweetDate < fromDate) { passedFromDate = true; break; } + if (toDate && tweetDate > toDate) continue; + } + + if (likedTweets.length < limit) { + likedTweets.push(t); + added++; + } + } + + emptyScrolls = added === 0 ? emptyScrolls + 1 : 0; + await page.evaluate(() => window.scrollBy(0, 1200)); + await randomDelay(); + } + + return likedTweets; +} + +// ============================================================================ +// Likes Scraper (who liked a specific tweet) // ============================================================================ /** @@ -938,6 +1118,7 @@ export default { scrapeTweets, searchTweets, scrapeThread, + scrapeLikedTweets, scrapeLikes, scrapeHashtag, scrapeMedia, From 0c02c43b9e6f2fc4ed009e4a571eae323b8a2cc6 Mon Sep 17 00:00:00 2001 From: nj-io <26359601+nj-io@users.noreply.github.com> Date: Sun, 5 Apr 2026 11:52:44 +0000 Subject: [PATCH 2/2] fix: viewport resize and scroll improvements for likes pagination - Expand viewport to 2400px height before scrolling (default 800px only fits ~1 tweet, causing X's virtualization to never render more) - Restore viewport to 800px after scraping - Wait for initial tweet selector before entering scroll loop - Scroll by window.innerHeight instead of fixed 1200px - MutationObserver-based wait for DOM changes after each scroll - Progressive backoff on empty scrolls (2-4s base + 1-1.5s per miss) - Increase empty scroll tolerance from 5 to 8 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/scrapers/twitter/index.js | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/scrapers/twitter/index.js b/src/scrapers/twitter/index.js index e2d7aa05..a300a605 100644 --- a/src/scrapers/twitter/index.js +++ b/src/scrapers/twitter/index.js @@ -518,6 +518,9 @@ export async function scrapeLikedTweets(page, username, options = {}) { if (page.url().includes('/login') || page.url().includes('/i/flow/login')) { throw new Error('Authentication failed — cookie may be expired.\n\nRun: xactions login'); } + // Expand viewport so X renders multiple tweets (default 800px only fits ~1) + await page.setViewport({ width: 1280, height: 2400 }); + await page.waitForSelector('article[data-testid="tweet"]', { timeout: 10000 }).catch(() => {}); await randomDelay(2000, 3000); const likedTweets = []; @@ -525,7 +528,7 @@ export async function scrapeLikedTweets(page, username, options = {}) { let emptyScrolls = 0; let passedFromDate = false; - while (likedTweets.length < limit && emptyScrolls < 5 && !passedFromDate) { + while (likedTweets.length < limit && emptyScrolls < 8 && !passedFromDate) { // Click "Show more" buttons one at a time — X re-renders the DOM after // each click, detaching all other button references. for (let sm = 0; sm < 20; sm++) { @@ -657,11 +660,26 @@ export async function scrapeLikedTweets(page, username, options = {}) { } } - emptyScrolls = added === 0 ? emptyScrolls + 1 : 0; - await page.evaluate(() => window.scrollBy(0, 1200)); - await randomDelay(); + if (added === 0) { + emptyScrolls++; + await randomDelay(2000 + emptyScrolls * 1000, 4000 + emptyScrolls * 1500); + } else { + emptyScrolls = 0; + } + + // Scroll to load more content, then wait for DOM to update + await page.evaluate(() => window.scrollBy(0, window.innerHeight)); + await page.evaluate(() => new Promise(resolve => { + const target = document.querySelector('[data-testid="primaryColumn"]') || document.body; + const observer = new MutationObserver(() => { observer.disconnect(); resolve(); }); + observer.observe(target, { childList: true, subtree: true }); + setTimeout(() => { observer.disconnect(); resolve(); }, 3000); + })); + await randomDelay(1000, 2000); } + // Restore viewport + await page.setViewport({ width: 1280, height: 800 }); return likedTweets; }