From fc29761baffa88921526c17ae1903040b322ec2f Mon Sep 17 00:00:00 2001 From: vaisd Date: Fri, 15 May 2026 03:20:58 -0400 Subject: [PATCH] fix: reduce false positives in movers and arbitrage detection --- api/lib/price-snapshots.ts | 10 +- api/markets/arbitrage.ts | 10 +- api/markets/movers.ts | 8 + package.json | 1 + src/api/__tests__/arbitrage-detector.test.ts | 191 +++++++++++++++++++ src/api/arbitrage-detector.ts | 31 +-- 6 files changed, 236 insertions(+), 15 deletions(-) create mode 100644 src/api/__tests__/arbitrage-detector.test.ts diff --git a/api/lib/price-snapshots.ts b/api/lib/price-snapshots.ts index fd67abf..71ffacf 100644 --- a/api/lib/price-snapshots.ts +++ b/api/lib/price-snapshots.ts @@ -121,8 +121,16 @@ export function computePriceChange( // overstates change magnitude — see prior FIX 7 in the original code. if (closestDiff > hoursAgo * 60 * 60 * 1000 * 0.5) return null; + const rawChange = current.yesPrice - closest.yesPrice; + const actualHoursElapsed = (current.timestamp - closest.timestamp) / (60 * 60 * 1000); + + // Normalize the change to represent a true hoursAgo equivalent. + // If the closest snapshot is 90 minutes ago instead of 60, scale down the change + // proportionally to avoid overstating movement when snapshots aren't exactly hoursAgo apart. + const normalizedChange = rawChange * (hoursAgo / actualHoursElapsed); + return { - change: current.yesPrice - closest.yesPrice, + change: normalizedChange, previousPrice: closest.yesPrice, }; } diff --git a/api/markets/arbitrage.ts b/api/markets/arbitrage.ts index 26a2f2a..aedf0d6 100644 --- a/api/markets/arbitrage.ts +++ b/api/markets/arbitrage.ts @@ -1,6 +1,10 @@ import type { VercelRequest, VercelResponse } from '@vercel/node'; import { getMarkets, getArbitrage, getMarketMetadata } from '../lib/market-cache'; +export function normalizeMinConfidence(minConfidence: number): number { + return Math.max(minConfidence, 0.5); +} + export default async function handler( req: VercelRequest, res: VercelResponse @@ -58,6 +62,10 @@ export default async function handler( return; } + // Enforce a conservative minimum confidence floor so callers cannot bypass + // the similarity filter by requesting an arbitrarily low confidence. + const effectiveMinConfidence = Math.max(minConfidenceNum, 0.5); + if (isNaN(limitNum) || limitNum < 1 || limitNum > 100) { res.status(400).json({ success: false, @@ -83,7 +91,7 @@ export default async function handler( // Apply additional filters client-side // Note: opportunities are already sorted by spread descending from detectArbitrage() opportunities = opportunities - .filter(arb => arb.confidence >= minConfidenceNum) + .filter(arb => arb.confidence >= effectiveMinConfidence) .filter(arb => !category || arb.polymarket.category === category || arb.kalshi.category === category) .slice(0, limitNum); diff --git a/api/markets/movers.ts b/api/markets/movers.ts index a4bca00..3c7807a 100644 --- a/api/markets/movers.ts +++ b/api/markets/movers.ts @@ -69,6 +69,14 @@ export default async function handler( return; } + if (minChangeNum < 0.02) { + res.status(400).json({ + success: false, + error: 'minChange must be at least 0.02 (the smallest precomputed bucket).', + }); + return; + } + if (isNaN(limitNum) || limitNum < 1 || limitNum > 100) { res.status(400).json({ success: false, diff --git a/package.json b/package.json index cca47cd..90ee8e4 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "backend:dev": "node server/api-server.mjs", "test:agent": "node --import tsx scripts/test-agent-api.ts", "test:agent:local": "MUSASHI_API_BASE_URL=http://127.0.0.1:3000 node --import tsx scripts/test-agent-api.ts", + "test:arbitrage": "node --import tsx src/api/__tests__/arbitrage-detector.test.ts", "typecheck": "tsc --noEmit -p tsconfig.json && tsc --noEmit -p api/tsconfig.json", "clean": "rm -rf dist .vercel" }, diff --git a/src/api/__tests__/arbitrage-detector.test.ts b/src/api/__tests__/arbitrage-detector.test.ts new file mode 100644 index 0000000..947927a --- /dev/null +++ b/src/api/__tests__/arbitrage-detector.test.ts @@ -0,0 +1,191 @@ +import { areMarketsSimilar } from '../arbitrage-detector'; +import { normalizeMinConfidence } from '../../../api/markets/arbitrage'; +import { computePriceChange } from '../../../api/lib/price-snapshots'; +import { Market } from '../../types/market'; + +function assertEqual(actual: unknown, expected: unknown, message: string) { + if (actual !== expected) { + throw new Error(`${message}: expected ${expected}, got ${actual}`); + } +} + +// Test cases for areMarketsSimilar function +// Run with: node --import tsx src/api/__tests__/arbitrage-detector.test.ts + +function runTests() { + console.log('Running areMarketsSimilar tests...\n'); + + // Test case 1: False positive that should now be rejected + // Previously matched due to low keyword threshold + stop words + const market1: Market = { + id: '1', + platform: 'polymarket', + title: 'Will the market go up?', + category: 'finance', + yesPrice: 0.5, + url: '', + keywords: ['market', 'will', 'go', 'up', 'price'], // Common stop words + volume24h: 0, + createdAt: new Date().toISOString(), + }; + + const market2: Market = { + id: '2', + platform: 'kalshi', + title: 'Will the stock market rise?', + category: 'finance', + yesPrice: 0.6, + url: '', + keywords: ['market', 'will', 'stock', 'rise', 'price'], // Common stop words + volume24h: 0, + createdAt: new Date().toISOString(), + }; + + const result1 = areMarketsSimilar(market1, market2); + console.log('Test 1 - False positive (should be rejected):'); + console.log(` Market1: "${market1.title}"`); + console.log(` Market2: "${market2.title}"`); + console.log(` Keywords1: [${market1.keywords.join(', ')}]`); + console.log(` Keywords2: [${market2.keywords.join(', ')}]`); + console.log(` Result: ${result1.isSimilar ? 'MATCH' : 'NO MATCH'} - ${result1.reason}`); + console.log(` Expected: NO MATCH (filtered stop words, low overlap)\n`); + + // Test case 2: True match that should still pass + // High title similarity + const market3: Market = { + id: '3', + platform: 'polymarket', + title: 'Will Apple stock hit $200 by end of 2026?', + category: 'finance', + yesPrice: 0.5, + url: '', + keywords: ['apple', 'stock', 'hit', '200', 'end', '2026'], + volume24h: 0, + createdAt: new Date().toISOString(), + }; + + const market4: Market = { + id: '4', + platform: 'kalshi', + title: 'Will Apple stock reach $200 by end of 2026?', + category: 'finance', + yesPrice: 0.55, + url: '', + keywords: ['apple', 'stock', 'reach', '200', 'end', '2026'], + volume24h: 0, + createdAt: new Date().toISOString(), + }; + + const result2 = areMarketsSimilar(market3, market4); + console.log('Test 2 - True match (should pass):'); + console.log(` Market3: "${market3.title}"`); + console.log(` Market4: "${market4.title}"`); + console.log(` Keywords3: [${market3.keywords.join(', ')}]`); + console.log(` Keywords4: [${market4.keywords.join(', ')}]`); + console.log(` Result: ${result2.isSimilar ? 'MATCH' : 'NO MATCH'} - ${result2.reason}`); + console.log(` Expected: MATCH (high title similarity)\n`); + + // Test case 3: Different categories (should be rejected) + const market5: Market = { + id: '5', + platform: 'polymarket', + title: 'Will Tesla stock go up?', + category: 'finance', + yesPrice: 0.5, + url: '', + keywords: ['tesla', 'stock', 'go', 'up'], + volume24h: 0, + createdAt: new Date().toISOString(), + }; + + const market6: Market = { + id: '6', + platform: 'kalshi', + title: 'Will Tesla win the race?', + category: 'sports', + yesPrice: 0.5, + url: '', + keywords: ['tesla', 'win', 'race'], + volume24h: 0, + createdAt: new Date().toISOString(), + }; + + const result3 = areMarketsSimilar(market5, market6); + console.log('Test 3 - Different categories (should be rejected):'); + console.log(` Market5: "${market5.title}" (category: ${market5.category})`); + console.log(` Market6: "${market6.title}" (category: ${market6.category})`); + console.log(` Result: ${result3.isSimilar ? 'MATCH' : 'NO MATCH'} - ${result3.reason}`); + console.log(` Expected: NO MATCH (different categories)\n`); + + // Test case 4: minConfidence clamp behavior should keep the floor at 0.5 + const minConfidenceClampResult = 0.1; + const effectiveClamp = normalizeMinConfidence(minConfidenceClampResult); + assertEqual(effectiveClamp, 0.5, 'minConfidence clamp should enforce a 0.5 floor'); + console.log('Test 4 - minConfidence clamp behavior confirmed.'); + console.log(` Requested minConfidence: ${minConfidenceClampResult}`); + console.log(` Effective minConfidence: ${effectiveClamp}`); + console.log(` Expected: 0.5 floor enforced\n`); + + // Test case 5: Strong keyword overlap (should pass with new threshold) + const market7: Market = { + id: '7', + platform: 'polymarket', + title: 'Federal Reserve interest rate decision', + category: 'economics', + yesPrice: 0.5, + url: '', + keywords: ['federal', 'reserve', 'interest', 'rate', 'decision', 'economy', 'policy'], + volume24h: 0, + createdAt: new Date().toISOString(), + }; + + const market8: Market = { + id: '8', + platform: 'kalshi', + title: 'Fed rate hike announcement', + category: 'economics', + yesPrice: 0.5, + url: '', + keywords: ['fed', 'rate', 'announcement', 'federal', 'reserve', 'interest', 'policy'], + volume24h: 0, + createdAt: new Date().toISOString(), + }; + + const result4 = areMarketsSimilar(market7, market8); + console.log('Test 4 - Strong keyword overlap (should pass):'); + console.log(` Market7: "${market7.title}"`); + console.log(` Market8: "${market8.title}"`); + console.log(` Keywords7: [${market7.keywords.join(', ')}]`); + console.log(` Keywords8: [${market8.keywords.join(', ')}]`); + console.log(` Result: ${result4.isSimilar ? 'MATCH' : 'NO MATCH'} - ${result4.reason}`); + console.log(` Expected: MATCH (5+ shared keywords after filtering)\n`); + + // Test case 6: Price change normalization + const now = Date.now(); + const snapshots = [ + { marketId: 'test', yesPrice: 0.4, timestamp: now - 90 * 60 * 1000 }, // 90 min ago + { marketId: 'test', yesPrice: 0.5, timestamp: now }, // current + ]; + const priceChangeResult = computePriceChange(snapshots, 1); + const expectedNormalizedChange = (0.5 - 0.4) * (1 / 1.5); // raw change 0.1 over 1.5h, normalized to 1h + assertEqual(priceChangeResult?.change, expectedNormalizedChange, 'Price change should be normalized by actual elapsed time'); + console.log('Test 6 - Price change normalization confirmed.'); + console.log(` Snapshots: 0.4 at 90min ago, 0.5 now`); + console.log(` Raw change: 0.1 over 1.5h`); + console.log(` Normalized change: ${priceChangeResult?.change} (expected: ${expectedNormalizedChange})`); + console.log(` Expected: Normalized to 1h equivalent\n`); + + // Test case 7: minChange validation (would return 400 for < 0.02) + const invalidMinChange = 0.01; + console.log('Test 7 - minChange validation (API would return 400):'); + console.log(` Requested minChange: ${invalidMinChange}`); + console.log(` Expected: 400 error "minChange must be at least 0.02"`); + console.log(` (This is validated in the API handler, not in unit tests)\n`); + + console.log('Tests completed.'); +} + +// Run tests if this file is executed directly +if (require.main === module) { + runTests(); +} \ No newline at end of file diff --git a/src/api/arbitrage-detector.ts b/src/api/arbitrage-detector.ts index 0f2317f..8d60171 100644 --- a/src/api/arbitrage-detector.ts +++ b/src/api/arbitrage-detector.ts @@ -67,8 +67,17 @@ function calculateTitleSimilarity(title1: string, title2: string): number { * Returns the number of shared keywords */ function calculateKeywordOverlap(market1: Market, market2: Market): number { - const keywords1 = new Set(market1.keywords); - const keywords2 = new Set(market2.keywords); + const stopWords = new Set([ + 'market', 'price', 'will', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', + 'an', 'a', 'is', 'are', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'can', 'could', + 'should', 'would', 'may', 'might', 'must', 'shall', 'will', 'can', 'may', 'might', 'must', 'ought', + 'dare', 'need', 'used', 'get', 'make', 'go', 'know', 'take', 'see', 'come', 'think', 'look', 'want', + 'give', 'use', 'find', 'tell', 'ask', 'work', 'seem', 'feel', 'try', 'leave', 'call', 'hit', 'reach', + 'win', 'lose', 'pass', 'than', 'over', 'under' + ]); + + const keywords1 = new Set(market1.keywords.filter(kw => !stopWords.has(kw.toLowerCase()))); + const keywords2 = new Set(market2.keywords.filter(kw => !stopWords.has(kw.toLowerCase()))); let overlap = 0; for (const kw of keywords1) { @@ -84,17 +93,13 @@ function calculateKeywordOverlap(market1: Market, market2: Market): number { * Check if two markets refer to the same event * Uses title similarity + keyword overlap + category matching */ -function areMarketsSimilar(poly: Market, kalshi: Market): { +export function areMarketsSimilar(poly: Market, kalshi: Market): { isSimilar: boolean; confidence: number; reason: string; } { - // Must be in the same category (or one is 'other') - const categoryMatch = poly.category === kalshi.category || - poly.category === 'other' || - kalshi.category === 'other'; - - if (!categoryMatch) { + // Must be in the same category + if (poly.category !== kalshi.category) { return { isSimilar: false, confidence: 0, reason: 'Different categories' }; } @@ -116,8 +121,8 @@ function areMarketsSimilar(poly: Market, kalshi: Market): { }; } - if (keywordOverlap >= 3) { - const confidence = Math.min(keywordOverlap / 10, 0.9); // Cap at 0.9 + if (keywordOverlap >= 4) { + const confidence = Math.min(0.5 + (keywordOverlap - 4) * 0.05, 0.9); // 4 keywords => 0.5, +0.05 per extra keyword return { isSimilar: true, confidence, @@ -125,12 +130,12 @@ function areMarketsSimilar(poly: Market, kalshi: Market): { }; } - // Check for exact entity matches (strong signal even with low overall similarity) + // Check for exact entity matches (strong signal only when titles are still fairly similar) const polyEntities = extractEntities(poly.title); const kalshiEntities = extractEntities(kalshi.title); const sharedEntities = Array.from(polyEntities).filter(e => kalshiEntities.has(e)); - if (sharedEntities.length >= 2 && titleSim > 0.3) { + if (sharedEntities.length >= 3 && titleSim > 0.45) { return { isSimilar: true, confidence: 0.7,