Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/core/analyzer-workflows.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,21 @@ describe('WorkflowAnalyzer', () => {
expect(result.clusters).toEqual([]);
});

it('filters harness-injected AGENTS.md context as noise', () => {
const now = Date.now();
const injected = '# AGENTS.md instructions for /home/me/project\n\n<INSTRUCTIONS>\nfollow repo conventions\n</INSTRUCTIONS>';
const sessions = Array.from({ length: 5 }, (_, i) =>
makeSession({
sessionId: `inj-${i}`,
requests: [makeRequest({ messageText: injected, timestamp: now + i * 1000 })],
lastMessageDate: now + i * 1000,
})
);
const analyzer = createAnalyzer(sessions);
const result = analyzer.getWorkflowOptimization();
expect(result.clusters).toEqual([]);
});

it('clusters repeated similar prompts', () => {
const now = Date.now();
// Need at least 3 occurrences with same fingerprint
Expand Down
4 changes: 3 additions & 1 deletion src/core/analyzer-workflows.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import { DateFilter, WorkflowCluster, WorkflowOptimizationData } from './types';
import { AnalyzerBase } from './analyzer-base';
import { toDateStr } from './helpers';
import { isHarnessInjectedContext, toDateStr } from './helpers';

/** Minimum prompt length to consider for clustering (skip trivial messages) */
const MIN_PROMPT_LEN = 15;
Expand Down Expand Up @@ -53,6 +53,8 @@ function normalizePrompt(raw: string): string {
/** Check if text is likely a system/bot message or noise rather than a user prompt */
function isNoise(text: string): boolean {
const t = text.trim();
// Harness-injected session-start context (e.g. Codex AGENTS.md / environment context)
if (isHarnessInjectedContext(t)) return true;
// Decorative separators/borders
if (/^[═─━=\-_*]{10,}/.test(t)) return true;
// System auth/notification messages
Expand Down
22 changes: 22 additions & 0 deletions src/core/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -294,3 +294,25 @@ export function classifyWorkType(msg: string): WorkType {
}
return 'other';
}

/**
* Harness-injected session-start payloads that are recorded as `user` messages
* but are not real user prompts (e.g. Codex injects the repo `AGENTS.md` and
* environment/instruction context at session start). These should be treated as
* noise so they don't get clustered into workflow/skill recommendations.
*
* Matches a small set of known leading markers only — deliberately NOT a generic
* `#` markdown header or `<tag>` rule, both of which would suppress legitimate
* user prompts.
*/
const HARNESS_INJECTED_MARKERS: RegExp[] = [
/^# AGENTS\.md instructions\b/,
/^<environment_context\b/,
/^<INSTRUCTIONS\b/,
/^<user_instructions\b/,
];

export function isHarnessInjectedContext(text: string): boolean {
const t = text.trimStart();
return HARNESS_INJECTED_MARKERS.some(re => re.test(t));
}
23 changes: 23 additions & 0 deletions src/core/parser-codex.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,29 @@ describe('parseCodexSessions skillsUsed extraction', () => {
expect(sessions[0].requests[0].skillsUsed).toEqual(['pdf']);
});
});

it('ignores harness-injected AGENTS.md context and captures the real prompt', () => {
withCodexFile([
{ type: 'session_meta', payload: { id: 'sess-inject-1', cwd: '/Users/me/proj' } },
{ type: 'turn_context', payload: { model: 'gpt-5.3-codex' } },
// Session-start injected context recorded as a user response_item.
{ type: 'response_item', timestamp: '2025-06-15T10:00:00Z',
payload: { type: 'message', role: 'user', content: [
{ type: 'input_text', text: '# AGENTS.md instructions for /Users/me/proj\n\nfollow repo conventions' },
{ type: 'input_text', text: '<environment_context>\n <cwd>/Users/me/proj</cwd>\n</environment_context>' },
] } },
// The actual user prompt.
{ type: 'event_msg', timestamp: '2025-06-15T10:00:05Z', payload: { type: 'user_message', message: 'what is this repo about?' } },
{ type: 'event_msg', timestamp: '2025-06-15T10:00:06Z', payload: { type: 'assistant_message', content: 'a coach' } },
], (sessionsDir) => {
const sessions = parseCodexSessions(sessionsDir);
expect(sessions).toHaveLength(1);
const texts = sessions[0].requests.map(r => r.messageText);
// The injected AGENTS.md / environment context must not be captured as a prompt.
expect(texts.some(t => t.startsWith('# AGENTS.md instructions'))).toBe(false);
expect(texts).toContain('what is this repo about?');
});
});
});

describe('findCodexDirs', () => {
Expand Down
7 changes: 5 additions & 2 deletions src/core/parser-codex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import * as path from 'path';
import { StringDecoder } from 'string_decoder';
import { ModelUsage, Session, SessionRequest } from './types';
import { assertTrustedPath, createRequest, createSession, detectDevcontainerFromRequests, extractSkillNameFromPath, extractSkillPathsFromText } from './parser-shared';
import { canonicalizeReasoningEffort, extractReasoningEffortFromModelId } from './helpers';
import { canonicalizeReasoningEffort, extractReasoningEffortFromModelId, isHarnessInjectedContext } from './helpers';

interface CodexLine {
type: string;
Expand Down Expand Up @@ -268,6 +268,9 @@ function extractFilePath(args: Record<string, unknown> | null | undefined): stri

function handleUserMessageEvent(payload: Record<string, unknown>, state: CodexParseState, ts: number | null, defaultModel: string): void {
const newMessage = stringValue(payload.message) || stringValue(payload.text);
// Harness-injected session-start context is recorded as a user message but is
// not a real prompt; ignore it before flushing or mutating turn state.
if (isHarnessInjectedContext(newMessage)) return;
if (state.currentUserMessage && state.currentUserMessage === newMessage && isTurnEmpty(state)) {
if (state.turnStartTs == null) state.turnStartTs = ts;
return;
Expand Down Expand Up @@ -342,7 +345,7 @@ function handleTurnContext(payload: Record<string, unknown>, state: CodexParseSt

function handleUserResponseItem(payload: Record<string, unknown>, state: CodexParseState, ts: number | null, defaultModel: string): void {
for (const item of extractContentItems(payload.content)) {
if (item.type !== 'input_text' || !item.text || item.text.startsWith('<')) continue;
if (item.type !== 'input_text' || !item.text || item.text.startsWith('<') || isHarnessInjectedContext(item.text)) continue;
if (!state.currentUserMessage) {
flushCodexTurn(state, defaultModel);
state.currentUserMessage = item.text;
Expand Down
Loading