Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@apideck/agent-analytics",
"version": "0.6.0",
"version": "0.7.0",
"description": "Track AI agent and bot traffic to your Next.js / Vercel app — PostHog, webhooks, or any custom analytics backend. Detects Claude, ChatGPT, Perplexity, Google-Extended, and more.",
"keywords": [
"ai",
Expand Down
94 changes: 85 additions & 9 deletions src/bots.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,23 +166,81 @@ export function firstUserAgentProduct(userAgent: string | null | undefined): str
return first || 'Other'
}

/**
* Detect likely headless/automated browsers by checking for missing headers
* that real browsers always send. Playwright, Puppeteer, and similar tools
* spoof the UA but often omit standard browser headers.
*
* Signals checked (each scores 1 point):
* - Missing `Accept-Language` — every real browser sends this
* - Missing `Sec-Fetch-Mode` — sent by all modern browsers
* - Missing `Sec-CH-UA` — Client Hints, Chromium 89+
* - `Sec-CH-UA` contains "HeadlessChrome"
* - Missing or bare Accept header — browsers send detailed accept lists
* - `Connection: close` with browser UA — browsers use keep-alive
*
* Returns a score (0-6), the signals that fired, and a boolean `likely`
* flag (score >= 2 with a browser-like UA).
*/
export function detectHeadless(req: Request): HeadlessDetection {
const signals: string[] = []
const ua = (req.headers.get('user-agent') || '').toLowerCase()
const isBrowserUA =
ua.includes('mozilla') || ua.includes('chrome') || ua.includes('safari') || ua.includes('firefox')

if (!isBrowserUA) return { score: 0, signals: [], likely: false }

if (!req.headers.get('accept-language')) {
signals.push('missing-accept-language')
}
if (!req.headers.get('sec-fetch-mode')) {
signals.push('missing-sec-fetch-mode')
}
const secChUa = req.headers.get('sec-ch-ua')
if (!secChUa) {
signals.push('missing-sec-ch-ua')
} else if (secChUa.toLowerCase().includes('headlesschrome')) {
signals.push('headless-chrome-hint')
}
const accept = req.headers.get('accept') || ''
if (!accept || accept === '*/*') {
signals.push('missing-or-bare-accept')
}
if ((req.headers.get('connection') || '').toLowerCase() === 'close') {
signals.push('connection-close')
}

const score = signals.length
return { score, signals, likely: score >= 2 }
}

export interface HeadlessDetection {
/** Number of suspicious signals found (0-6). */
score: number
/** Names of the specific signals that fired. */
signals: string[]
/** True when score >= 2 — strong headless indication. */
likely: boolean
}

export type AgentKind =
| 'declared-crawler'
| 'coding-agent-hint'
| 'headless-likely'
| 'browser'
| 'other'

export interface AgentClassification {
/**
* Categorical tag for the UA:
* Categorical tag for the request:
*
* - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.
* - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose
* signal; could be a coding agent, a curl script, or any automation.
* - `'browser'` — looks like a real browser. Could be a genuine user or
* a Playwright-based agent (Aider, OpenCode) that can't be distinguished
* at the UA layer.
* - `'other'` — unrecognised or empty.
* - `'headless-likely'` — Browser-like UA but missing standard headers.
* Strong signal of Playwright/Puppeteer automation (Aider, OpenCode, etc.).
* - `'browser'` — Looks like a real browser with expected headers present.
* - `'other'` — Unrecognised or empty.
*/
kind: AgentKind
/** Human-readable label, same string {@link parseBotName} returns. */
Expand All @@ -191,13 +249,13 @@ export interface AgentClassification {
isAiBot: boolean
/** Loose: `true` for known HTTP-library / automation UAs. */
codingAgentHint: boolean
/** Headless browser detection result. Only populated when `req` is passed. */
headless?: HeadlessDetection
}

/**
* One-stop classification of a user-agent. Combines {@link isAiBot},
* {@link isHttpClient}, and {@link parseBotName} into a single structured
* result. Used internally by `trackVisit` to populate event properties;
* useful in consumer code when you need all signals at once.
* UA-only classification. Use {@link classifyRequest} for full detection
* including headless browser heuristics.
*/
export function classifyAgent(userAgent: string | null | undefined): AgentClassification {
const label = parseBotName(userAgent)
Expand All @@ -212,3 +270,21 @@ export function classifyAgent(userAgent: string | null | undefined): AgentClassi

return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }
}

/**
* Full request classification — combines UA parsing with header-based
* headless detection. When a browser-like UA is missing standard headers,
* the kind is promoted from `'browser'` to `'headless-likely'`.
*/
export function classifyRequest(req: Request): AgentClassification {
const userAgent = req.headers.get('user-agent') || ''
const base = classifyAgent(userAgent)
const headless = detectHeadless(req)

let kind = base.kind
if (kind === 'browser' && headless.likely) {
kind = 'headless-likely'
}

return { ...base, kind, headless }
}
4 changes: 3 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ export {
AI_BOT_PATTERN,
HTTP_CLIENT_PATTERN,
classifyAgent,
classifyRequest,
detectHeadless,
firstUserAgentProduct,
isAiBot,
isHttpClient,
parseBotName
} from './bots.js'
export type { AgentClassification, AgentKind } from './bots.js'
export type { AgentClassification, AgentKind, HeadlessDetection } from './bots.js'
export { hashId } from './hash.js'
export { posthogAnalytics } from './adapters/posthog.js'
export { webhookAnalytics } from './adapters/webhook.js'
Expand Down
13 changes: 10 additions & 3 deletions src/track.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { classifyAgent, isAiBot, isHttpClient } from './bots.js'
import { classifyRequest, detectHeadless, isAiBot, isHttpClient } from './bots.js'
import { hashId } from './hash.js'
import type { TrackVisitOptions } from './types.js'

Expand All @@ -20,7 +20,12 @@ export async function trackVisit(
const onlyBots = opts.onlyBots ?? false
const skipBrowsers = opts.skipBrowsers ?? false
if (onlyBots && !isAiBot(userAgent)) return
if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) return
if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) {
// Not a declared bot or HTTP client — check headless heuristics.
// Playwright-based agents (Aider, OpenCode) will pass if they're missing
// standard browser headers. Real browsers get skipped.
if (!detectHeadless(req).likely) return
}

let pathname = '/'
let originFromUrl = ''
Expand All @@ -37,7 +42,7 @@ export async function trackVisit(
const forwardedFor = req.headers.get('x-forwarded-for') || ''
const ip = forwardedFor.split(',')[0]?.trim() ?? ''
const referer = req.headers.get('referer')
const classification = classifyAgent(userAgent)
const classification = classifyRequest(req)

const event = {
event: opts.eventName ?? 'agent_visit',
Expand All @@ -52,6 +57,8 @@ export async function trackVisit(
bot_name: classification.label,
ua_category: classification.kind,
coding_agent_hint: classification.codingAgentHint,
headless_score: classification.headless?.score ?? 0,
headless_likely: classification.headless?.likely ?? false,
referer,
source: opts.source ?? null,
...opts.properties
Expand Down
33 changes: 29 additions & 4 deletions test/track.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,15 @@ describe('trackVisit', () => {
})
})

it('sets bot_name to Browser for human traffic when onlyBots is false', async () => {
it('sets bot_name to Browser for real browser traffic', async () => {
const spy = vi.fn()
await trackVisit(
makeRequest('https://example.com/page', {
'user-agent': 'Mozilla/5.0 (Macintosh) Chrome/120'
'user-agent': 'Mozilla/5.0 (Macintosh) Chrome/120',
'accept-language': 'en-US,en;q=0.9',
'sec-fetch-mode': 'navigate',
'sec-ch-ua': '"Chromium";v="120"',
accept: 'text/html,application/xhtml+xml'
}),
{ analytics: customAnalytics(spy), onlyBots: false }
)
Expand All @@ -57,6 +61,7 @@ describe('trackVisit', () => {
expect(event.properties.bot_name).toBe('Browser')
expect(event.properties.ua_category).toBe('browser')
expect(event.properties.coding_agent_hint).toBe(false)
expect(event.properties.headless_likely).toBe(false)
})

it('sets coding_agent_hint and ua_category for HTTP-library UAs (onlyBots: false)', async () => {
Expand Down Expand Up @@ -129,17 +134,37 @@ describe('trackVisit', () => {
expect(event.properties.coding_agent_hint).toBe(true)
})

it('skipBrowsers skips regular browsers', async () => {
it('skipBrowsers skips real browsers (with standard headers)', async () => {
const spy = vi.fn()
await trackVisit(
makeRequest('https://example.com/page', {
'user-agent': 'Mozilla/5.0 (Macintosh) Chrome/120'
'user-agent': 'Mozilla/5.0 (Macintosh) Chrome/120',
'accept-language': 'en-US,en;q=0.9',
'sec-fetch-mode': 'navigate',
'sec-ch-ua': '"Chromium";v="120", "Google Chrome";v="120"',
accept: 'text/html,application/xhtml+xml'
}),
{ analytics: customAnalytics(spy), skipBrowsers: true }
)
expect(spy).not.toHaveBeenCalled()
})

it('skipBrowsers captures headless browsers (missing standard headers)', async () => {
const spy = vi.fn()
await trackVisit(
makeRequest('https://example.com/page', {
'user-agent': 'Mozilla/5.0 (Macintosh) Chrome/120'
// Missing: accept-language, sec-fetch-mode, sec-ch-ua, proper accept
}),
{ analytics: customAnalytics(spy), skipBrowsers: true }
)
expect(spy).toHaveBeenCalledOnce()
const event = spy.mock.calls[0]![0] as CaptureEvent
expect(event.properties.ua_category).toBe('headless-likely')
expect(event.properties.headless_likely).toBe(true)
expect(event.properties.headless_score).toBeGreaterThanOrEqual(2)
})

it('honours a custom event name', async () => {
const spy = vi.fn()
await trackVisit(
Expand Down
Loading