From 800ec6c9ba4aecae019ab778139e2b9af73ffa64 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 19:32:53 +0100 Subject: [PATCH 01/14] Update dashboard to reflect configurable lookback window and improve date range display --- vscode-extension/src/extension.ts | 5 ++-- .../src/webview/dashboard/main.ts | 27 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index 53995f28..74feadc4 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -5293,10 +5293,10 @@ ${hashtag}`; ); } - // Query backend for last 30 days + // Query backend for the configured lookback window const now = new Date(); const todayKey = BackendUtility.toUtcDayKey(now); - const startKey = BackendUtility.addDaysUtc(todayKey, -29); + const startKey = BackendUtility.addDaysUtc(todayKey, -(settings.lookbackDays - 1)); // Fetch ALL entities across all datasets using the facade's public API const allEntities = await this.backend.getAllAggEntitiesForRange( @@ -5707,6 +5707,7 @@ ${hashtag}`; firstDate, lastDate, }, + lookbackDays: settings.lookbackDays, lastUpdated: new Date().toISOString(), }; } diff --git a/vscode-extension/src/webview/dashboard/main.ts b/vscode-extension/src/webview/dashboard/main.ts index d849114c..d5f342fe 100644 --- a/vscode-extension/src/webview/dashboard/main.ts +++ b/vscode-extension/src/webview/dashboard/main.ts @@ -48,6 +48,7 @@ interface DashboardStats { firstDate?: string | null; lastDate?: string | null; }; + lookbackDays?: number; lastUpdated: string | Date; compactNumbers?: boolean; } @@ -153,7 +154,7 @@ function renderShell(root: HTMLElement, stats: DashboardStats): void { const header = el("div", "header"); const titleGroup = el("div", "title-group"); const title = el("div", "title", "📊 Team Dashboard"); - const period = el("div", "period", "Last 30 days"); + const period = el("div", "period", `Last ${stats.lookbackDays ?? 30} days`); titleGroup.append(title, period); const buttonRow = el("div", "button-row"); @@ -225,29 +226,27 @@ function buildTeamSection(stats: DashboardStats): HTMLElement { ); // Add date range info if available - console.log( - "Team firstDate:", - stats.team.firstDate, - "lastDate:", - stats.team.lastDate, - ); let dateInfo: HTMLElement | null = null; if (stats.team.firstDate || stats.team.lastDate) { dateInfo = el("div", "info-box"); dateInfo.style.cssText = - "margin-top: 16px; padding: 12px; background: rgba(255,255,255,0.05); border-radius: 6px; font-size: 13px; color: #aaa;"; + "margin-top: 16px; padding: 12px 14px; background: var(--vscode-inputValidation-infoBackground, rgba(0,120,212,0.1)); border: 1px solid var(--vscode-inputValidation-infoBorder, rgba(0,120,212,0.4)); border-radius: 6px; font-size: 13px; color: var(--vscode-foreground, #ccc);"; const firstDate = stats.team.firstDate; const lastDate = stats.team.lastDate; + const rangeLabel = el("div", ""); + rangeLabel.style.cssText = "font-weight: 600; margin-bottom: 4px;"; if (firstDate && lastDate) { - dateInfo.textContent = `📅 Data Range: ${firstDate} to ${lastDate}`; + rangeLabel.textContent = `📅 Synced data range: ${firstDate} → ${lastDate}`; } else if (firstDate) { - dateInfo.textContent = `📅 First Data: ${firstDate}`; + rangeLabel.textContent = `📅 First synced data: ${firstDate}`; } else if (lastDate) { - dateInfo.textContent = `📅 Last Data: ${lastDate}`; + rangeLabel.textContent = `📅 Last synced data: ${lastDate}`; } - console.log("Date info element created"); - } else { - console.log("No date range data available"); + const rangeNote = el("div", ""); + rangeNote.style.cssText = "font-size: 11px; opacity: 0.7; margin-top: 3px;"; + const lookback = stats.lookbackDays ?? 30; + rangeNote.textContent = `Dashboard is filtered to the last ${lookback} days. This reflects what team members have synced to cloud storage. Older data may exist locally but was outside their configured upload window.`; + dateInfo.append(rangeLabel, rangeNote); } const leaderboard = buildLeaderboard(stats); From c736eec4321b91f7f0bb70c0150e48cf20715bf3 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 19:46:20 +0100 Subject: [PATCH 02/14] Add backfill historical data functionality and update dashboard for sync coverage --- vscode-extension/src/backend/facade.ts | 12 +++ .../src/backend/services/syncService.ts | 94 ++++++++++++++++++- vscode-extension/src/extension.ts | 45 +++++++++ .../src/webview/dashboard/main.ts | 36 ++++++- 4 files changed, 180 insertions(+), 7 deletions(-) diff --git a/vscode-extension/src/backend/facade.ts b/vscode-extension/src/backend/facade.ts index 42069a22..f0d4eaeb 100644 --- a/vscode-extension/src/backend/facade.ts +++ b/vscode-extension/src/backend/facade.ts @@ -441,6 +441,18 @@ export class BackendFacade { return result; } + /** + * Backfill historical data to Azure Table Storage. + * Scans ALL local session files (ignoring the mtime-based age filter) and upserts daily + * rollups for every day within the given lookback window (default 365 days). + * Use this when the normal sync has missed data due to the mtime filter. + */ + public async backfillHistoricalData(maxLookbackDays = 365): Promise { + const settings = this.getSettings(); + await this.syncService.backfillSync(settings, this.isConfigured(settings), maxLookbackDays); + this.clearQueryCache(); + } + public async tryGetBackendDetailedStatsForStatusBar( settings: BackendSettings, ): Promise { diff --git a/vscode-extension/src/backend/services/syncService.ts b/vscode-extension/src/backend/services/syncService.ts index 8d29ebbf..b5ff27bb 100644 --- a/vscode-extension/src/backend/services/syncService.ts +++ b/vscode-extension/src/backend/services/syncService.ts @@ -607,12 +607,13 @@ export class SyncService { * Compute daily rollups from local session files. * Uses cached session data when available to avoid re-parsing files. */ - private async computeDailyRollupsFromLocalSessions(args: { lookbackDays: number; userId?: string; sessionFiles?: string[] }): Promise<{ + private async computeDailyRollupsFromLocalSessions(args: { lookbackDays: number; userId?: string; sessionFiles?: string[]; skipMtimeFilter?: boolean }): Promise<{ rollups: Map; workspaceNamesById: Record; machineNamesById: Record; }> { const lookbackDays = args.lookbackDays; + const skipMtimeFilter = args.skipMtimeFilter === true; const userId = (args.userId ?? '').trim() || undefined; const now = new Date(); // Include all events from the start of the first day in the range (UTC). @@ -652,9 +653,8 @@ export class SyncService { const fileStat = await this.deps.statSessionFile(sessionFile); fileMtimeMs = fileStat.mtimeMs; - - // Skip files older than lookback period - if (fileMtimeMs < startMs) { + // Skip files older than lookback period (unless backfill mode bypasses this filter) + if (!skipMtimeFilter && fileMtimeMs < startMs) { filesSkipped++; continue; } @@ -1047,4 +1047,90 @@ export class SyncService { }); return this.syncQueue; } + + /** + * Backfill historical data to Azure Table Storage. + * Scans ALL local session files (ignoring file mtime) and upserts daily rollups for every + * day that has local data within the given lookback window. This is safe to run at any time + * because the underlying upsert operation is idempotent. + * + * Use this to recover from situations where the normal sync missed data due to the + * mtime-based file-age filter (e.g. the backend was configured after a large volume of + * activity had already accumulated locally). + */ + async backfillSync(settings: BackendSettings, isConfigured: boolean, maxLookbackDays = 365): Promise { + const sharingPolicy = computeBackendSharingPolicy({ + enabled: settings.enabled, + profile: settings.sharingProfile, + shareWorkspaceMachineNames: settings.shareWorkspaceMachineNames + }); + if (!sharingPolicy.allowCloudSync || !isConfigured) { + this.deps.warn('Backfill: skipping (cloud sync disabled or backend not configured)'); + return; + } + + this.deps.log(`Backfill: starting deep scan (up to ${maxLookbackDays} days, mtime filter disabled)`); + + const creds = await this.credentialService.getBackendDataPlaneCredentials(settings); + if (!creds) { + this.deps.warn('Backfill: skipping (credentials not available)'); + return; + } + + await this.dataPlaneService.ensureTableExists(settings, creds.tableCredential); + await this.dataPlaneService.validateAccess(settings, creds.tableCredential); + + const resolvedIdentity = await this.resolveEffectiveUserIdentityForSync(settings, sharingPolicy.includeUserDimension); + const { rollups, workspaceNamesById, machineNamesById } = await this.computeDailyRollupsFromLocalSessions({ + lookbackDays: maxLookbackDays, + userId: resolvedIdentity.userId, + skipMtimeFilter: true // backfill: open every file regardless of age + }); + + const dayKeys = new Set(); + for (const { key } of rollups.values()) { dayKeys.add(key.day); } + const sortedDays = Array.from(dayKeys).sort(); + this.deps.log(`Backfill: found data for ${sortedDays.length} days: ${sortedDays.slice(0, 10).join(', ')}${sortedDays.length > 10 ? '…' : ''}`); + + const tableClient = this.dataPlaneService.createTableClient(settings, creds.tableCredential); + const entities = []; + for (const { key, value } of rollups.values()) { + const effectiveUserId = (key.userId ?? '').trim() || undefined; + const includeConsent = sharingPolicy.includeUserDimension && !!effectiveUserId; + const includeNames = sharingPolicy.includeNames; + const workspaceIdToStore = sharingPolicy.workspaceIdStrategy === 'hashed' + ? hashWorkspaceIdForTeam({ datasetId: settings.datasetId, workspaceId: key.workspaceId }) + : key.workspaceId; + const machineIdToStore = sharingPolicy.machineIdStrategy === 'hashed' + ? hashMachineIdForTeam({ datasetId: settings.datasetId, machineId: key.machineId }) + : key.machineId; + const workspaceName = includeNames ? workspaceNamesById[key.workspaceId] : undefined; + const machineName = includeNames ? machineNamesById[key.machineId] : undefined; + const entity = createDailyAggEntity({ + datasetId: settings.datasetId, + day: key.day, + model: key.model, + workspaceId: workspaceIdToStore, + workspaceName, + machineId: machineIdToStore, + machineName, + userId: effectiveUserId, + userKeyType: resolvedIdentity.userKeyType, + shareWithTeam: includeConsent ? true : undefined, + consentAt: validateConsentTimestamp(settings.shareConsentAt, this.deps.log), + inputTokens: value.inputTokens, + outputTokens: value.outputTokens, + interactions: value.interactions, + fluencyMetrics: value.fluencyMetrics + }); + entities.push(entity); + } + + const { successCount, errors } = await this.dataPlaneService.upsertEntitiesBatch(tableClient, entities); + if (errors.length > 0) { + this.deps.warn(`Backfill: ${successCount}/${entities.length} entities synced, ${errors.length} failed`); + } else { + this.deps.log(`Backfill: ${successCount} entities synced successfully across ${sortedDays.length} days`); + } + } } diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index 74feadc4..5469e50d 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -5141,6 +5141,9 @@ ${hashtag}`; case "deleteUserDataset": await this.dispatch('deleteUserDataset', () => this.handleDeleteUserDataset(message.userId, message.datasetId)); break; + case "backfillHistoricalData": + await this.dispatch('backfillHistoricalData', () => this.handleBackfillHistoricalData()); + break; } }); @@ -5264,6 +5267,34 @@ ${hashtag}`; * Applies the same 6-category scoring thresholds as calculateMaturityScores(). */ + /** + * Backfill historical token data to Azure Table Storage by scanning all local session files + * without the normal mtime-based age filter. + */ + private async handleBackfillHistoricalData(): Promise { + if (!this.backend) { + return; + } + + this.log('🔄 Starting historical data backfill...'); + this.dashboardPanel?.webview.postMessage({ command: 'dashboardLoading' }); + + try { + await this.backend.backfillHistoricalData(); + this.log('✅ Historical data backfill complete'); + vscode.window.showInformationMessage('Historical data backfill complete. Refreshing dashboard...'); + // Invalidate the cached dashboard data so the refresh reflects the new backfill + this.lastDashboardData = undefined; + await this.refreshDashboardPanel(); + } catch (error) { + this.error('Backfill failed:', error); + this.dashboardPanel?.webview.postMessage({ + command: 'dashboardError', + message: 'Backfill failed. Please check backend configuration and try again.', + }); + } + } + /** * Fetches and aggregates data for the Team Dashboard. */ @@ -5689,6 +5720,18 @@ ${hashtag}`; } } + // Fetch local stats to surface the sync coverage gap in the dashboard + let localTokens: number | undefined; + let localInteractions: number | undefined; + try { + const localStats = await this.calculateDetailedStats(undefined); + localTokens = localStats.last30Days.tokens; + const p = localStats.last30Days; + localInteractions = p.sessions * p.avgInteractionsPerSession; + } catch { + // Non-critical: leave undefined + } + return { personal: { userId: currentUserId || "", @@ -5698,6 +5741,8 @@ ${hashtag}`; devices: Array.from(personalDevices), workspaces: Array.from(personalWorkspaces), modelUsage: personalModelUsage, + localTokens, + localInteractions, }, team: { members: teamMembers, diff --git a/vscode-extension/src/webview/dashboard/main.ts b/vscode-extension/src/webview/dashboard/main.ts index d5f342fe..b79bb4e2 100644 --- a/vscode-extension/src/webview/dashboard/main.ts +++ b/vscode-extension/src/webview/dashboard/main.ts @@ -16,6 +16,8 @@ interface UserSummary { devices: string[]; workspaces: string[]; modelUsage: ModelUsage; + localTokens?: number; + localInteractions?: number; } interface TeamMemberStats { @@ -195,8 +197,8 @@ function buildPersonalSection(personal: UserSummary): HTMLElement { const grid = el("div", "stats-grid"); grid.append( - buildStatCard("Total Tokens", formatCompact(personal.totalTokens)), - buildStatCard("Interactions", formatNumber(personal.totalInteractions)), + buildStatCard("Synced Tokens", formatCompact(personal.totalTokens)), + buildStatCard("Synced Interactions", formatNumber(personal.totalInteractions)), buildStatCard("Estimated Cost", formatCost(personal.totalCost)), buildStatCard("Devices", personal.devices.length.toString()), buildStatCard("Workspaces", personal.workspaces.length.toString()), @@ -204,7 +206,35 @@ function buildPersonalSection(personal: UserSummary): HTMLElement { const modelSection = buildModelBreakdown(personal.modelUsage); - section.append(sectionTitle, grid, modelSection); + // Show sync coverage warning when local activity significantly exceeds synced data + const localTokens = personal.localTokens ?? 0; + const syncedTokens = personal.totalTokens; + const showSyncWarning = localTokens > 0 && syncedTokens < localTokens * 0.9; + + if (showSyncWarning) { + const syncCoverage = localTokens > 0 ? Math.round((syncedTokens / localTokens) * 100) : 100; + const warning = el("div", "sync-warning"); + warning.style.cssText = + "margin-top: 12px; padding: 10px 14px; background: var(--vscode-inputValidation-warningBackground, rgba(200,120,0,0.1)); border: 1px solid var(--vscode-inputValidation-warningBorder, rgba(200,120,0,0.5)); border-radius: 6px; font-size: 12px; color: var(--vscode-foreground, #ccc);"; + const warningTitle = el("div", ""); + warningTitle.style.cssText = "font-weight: 600; margin-bottom: 4px;"; + warningTitle.textContent = `⚠️ Only ${syncCoverage}% of your local activity is synced to cloud (${formatCompact(syncedTokens)} of ${formatCompact(localTokens)} local tokens in last 30 days)`; + const warningNote = el("div", ""); + warningNote.style.cssText = "font-size: 11px; opacity: 0.7; margin-top: 3px;"; + warningNote.textContent = "To close the gap: increase the lookback window, run a manual sync, or check that blob upload is enabled and configured."; + const backfillBtn = document.createElement("button"); + backfillBtn.textContent = "⏫ Backfill Historical Data"; + backfillBtn.style.cssText = + "margin-top: 10px; padding: 5px 12px; font-size: 12px; cursor: pointer; border: 1px solid var(--vscode-button-border, transparent); background: var(--vscode-button-secondaryBackground, #3a3d41); color: var(--vscode-button-secondaryForeground, #ccc); border-radius: 4px;"; + backfillBtn.title = "Scan all local session files and upload missing daily data to Azure Storage"; + backfillBtn.addEventListener("click", () => { + vscode.postMessage({ command: "backfillHistoricalData" }); + }); + warning.append(warningTitle, warningNote, backfillBtn); + section.append(sectionTitle, grid, warning, modelSection); + } else { + section.append(sectionTitle, grid, modelSection); + } return section; } From 819b3c83fc9b053e551120b74a88b3796d3dc092 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 22:16:47 +0100 Subject: [PATCH 03/14] Enhance backfill functionality with progress reporting in dashboard --- vscode-extension/src/backend/facade.ts | 4 ++-- .../src/backend/services/syncService.ts | 16 ++++++++++++---- vscode-extension/src/extension.ts | 18 ++++++++++++++++-- vscode-extension/src/webview/dashboard/main.ts | 17 +++++++++++++++++ 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/vscode-extension/src/backend/facade.ts b/vscode-extension/src/backend/facade.ts index f0d4eaeb..42c19db4 100644 --- a/vscode-extension/src/backend/facade.ts +++ b/vscode-extension/src/backend/facade.ts @@ -447,9 +447,9 @@ export class BackendFacade { * rollups for every day within the given lookback window (default 365 days). * Use this when the normal sync has missed data due to the mtime filter. */ - public async backfillHistoricalData(maxLookbackDays = 365): Promise { + public async backfillHistoricalData(maxLookbackDays = 365, onProgress?: (processed: number, total: number, daysFound: number) => void): Promise { const settings = this.getSettings(); - await this.syncService.backfillSync(settings, this.isConfigured(settings), maxLookbackDays); + await this.syncService.backfillSync(settings, this.isConfigured(settings), maxLookbackDays, onProgress); this.clearQueryCache(); } diff --git a/vscode-extension/src/backend/services/syncService.ts b/vscode-extension/src/backend/services/syncService.ts index b5ff27bb..62c025fd 100644 --- a/vscode-extension/src/backend/services/syncService.ts +++ b/vscode-extension/src/backend/services/syncService.ts @@ -607,13 +607,14 @@ export class SyncService { * Compute daily rollups from local session files. * Uses cached session data when available to avoid re-parsing files. */ - private async computeDailyRollupsFromLocalSessions(args: { lookbackDays: number; userId?: string; sessionFiles?: string[]; skipMtimeFilter?: boolean }): Promise<{ + private async computeDailyRollupsFromLocalSessions(args: { lookbackDays: number; userId?: string; sessionFiles?: string[]; skipMtimeFilter?: boolean; onProgress?: (processed: number, total: number, daysFound: number) => void }): Promise<{ rollups: Map; workspaceNamesById: Record; machineNamesById: Record; }> { const lookbackDays = args.lookbackDays; const skipMtimeFilter = args.skipMtimeFilter === true; + const onProgress = args.onProgress; const userId = (args.userId ?? '').trim() || undefined; const now = new Date(); // Include all events from the start of the first day in the range (UTC). @@ -644,7 +645,8 @@ export class SyncService { let filesSkipped = 0; let filesProcessed = 0; - this.deps.log(`Backend sync: analyzing ${sessionFiles.length} session files`); + const totalFiles = sessionFiles.length; + this.deps.log(`Backend sync: analyzing ${totalFiles} session files`); for (const sessionFile of sessionFiles) { let fileMtimeMs: number | undefined; @@ -659,6 +661,11 @@ export class SyncService { continue; } filesProcessed++; + // Report progress every 10 files (avoids flooding the callback) + if (onProgress && filesProcessed % 10 === 0) { + const daysFound = new Set(Array.from(rollups.values()).map(r => r.key.day)).size; + onProgress(filesProcessed, totalFiles, daysFound); + } } catch (e) { this.deps.warn(`Backend sync: failed to stat session file ${sessionFile}: ${e}`); continue; @@ -1058,7 +1065,7 @@ export class SyncService { * mtime-based file-age filter (e.g. the backend was configured after a large volume of * activity had already accumulated locally). */ - async backfillSync(settings: BackendSettings, isConfigured: boolean, maxLookbackDays = 365): Promise { + async backfillSync(settings: BackendSettings, isConfigured: boolean, maxLookbackDays = 365, onProgress?: (processed: number, total: number, daysFound: number) => void): Promise { const sharingPolicy = computeBackendSharingPolicy({ enabled: settings.enabled, profile: settings.sharingProfile, @@ -1084,7 +1091,8 @@ export class SyncService { const { rollups, workspaceNamesById, machineNamesById } = await this.computeDailyRollupsFromLocalSessions({ lookbackDays: maxLookbackDays, userId: resolvedIdentity.userId, - skipMtimeFilter: true // backfill: open every file regardless of age + skipMtimeFilter: true, // backfill: open every file regardless of age + onProgress }); const dayKeys = new Set(); diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index 5469e50d..056631ae 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -5277,10 +5277,24 @@ ${hashtag}`; } this.log('🔄 Starting historical data backfill...'); - this.dashboardPanel?.webview.postMessage({ command: 'dashboardLoading' }); + this.dashboardPanel?.webview.postMessage({ + command: 'backfillProgress', + text: 'Backfill starting — scanning local session files...', + processed: 0, + total: 0, + daysFound: 0, + }); try { - await this.backend.backfillHistoricalData(); + await this.backend.backfillHistoricalData(365, (processed, total, daysFound) => { + this.dashboardPanel?.webview.postMessage({ + command: 'backfillProgress', + text: `Backfill in progress: ${processed}${total > 0 ? `/${total}` : ''} files scanned, ${daysFound} days found...`, + processed, + total, + daysFound, + }); + }); this.log('✅ Historical data backfill complete'); vscode.window.showInformationMessage('Historical data backfill complete. Refreshing dashboard...'); // Invalidate the cached dashboard data so the refresh reflects the new backfill diff --git a/vscode-extension/src/webview/dashboard/main.ts b/vscode-extension/src/webview/dashboard/main.ts index b79bb4e2..a70bac07 100644 --- a/vscode-extension/src/webview/dashboard/main.ts +++ b/vscode-extension/src/webview/dashboard/main.ts @@ -74,6 +74,9 @@ const initialData = window.__INITIAL_DASHBOARD__; console.log("[CopilotTokenTracker] dashboard webview loaded"); console.log("[CopilotTokenTracker] initialData:", initialData); +/** Reference to the loading text element so backfillProgress messages can update it in place. */ +let loadingTextEl: HTMLElement | null = null; + function showLoading(): void { const root = document.getElementById("root"); if (!root) { @@ -96,6 +99,7 @@ function showLoading(): void { const loading = el("div", "loading-indicator"); const spinner = el("div", "spinner"); const loadingText = el("div", "loading-text", "Loading dashboard data..."); + loadingTextEl = loadingText; loading.append(spinner, loadingText); container.append(header, loading); @@ -103,6 +107,7 @@ function showLoading(): void { } function showError(message: string): void { + loadingTextEl = null; const root = document.getElementById("root"); if (!root) { return; @@ -131,6 +136,7 @@ function showError(message: string): void { } function render(stats: DashboardStats): void { + loadingTextEl = null; setCompactNumbers(stats.compactNumbers !== false); const root = document.getElementById("root"); if (!root) { @@ -625,6 +631,17 @@ window.addEventListener("message", (event) => { case "dashboardError": showError(message.message); break; + case "backfillProgress": { + const progressText = message.text ?? "Backfill in progress..."; + if (!loadingTextEl) { + showLoading(); // ensures loadingTextEl is set + } + const textEl = loadingTextEl; + if (textEl) { + textEl.textContent = progressText; + } + break; + } } }); From 655087f6530554607442bc2d801fca07596f86e8 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 22:22:09 +0100 Subject: [PATCH 04/14] Add Visual Studio session detection to backend services and update dependencies --- vscode-extension/src/backend/facade.ts | 3 +++ vscode-extension/src/backend/services/syncService.ts | 8 ++++++++ vscode-extension/src/extension.ts | 2 ++ 3 files changed, 13 insertions(+) diff --git a/vscode-extension/src/backend/facade.ts b/vscode-extension/src/backend/facade.ts index 42c19db4..1aadc2ca 100644 --- a/vscode-extension/src/backend/facade.ts +++ b/vscode-extension/src/backend/facade.ts @@ -73,6 +73,8 @@ export interface BackendFacadeDeps { modelUsage: ModelUsage; timestamp: number; }>; + // Visual Studio session detection (binary MessagePack — cannot be parsed as JSON) + isVSSessionFile?: (sessionFile: string) => boolean; } export class BackendFacade { @@ -126,6 +128,7 @@ export class BackendFacade { statSessionFile: deps.statSessionFile, isOpenCodeSession: deps.isOpenCodeSession, getOpenCodeSessionData: deps.getOpenCodeSessionData, + isVSSessionFile: deps.isVSSessionFile, }, this.credentialService, this.dataPlaneService, diff --git a/vscode-extension/src/backend/services/syncService.ts b/vscode-extension/src/backend/services/syncService.ts index 62c025fd..eb39cd0c 100644 --- a/vscode-extension/src/backend/services/syncService.ts +++ b/vscode-extension/src/backend/services/syncService.ts @@ -84,6 +84,8 @@ export interface SyncServiceDeps { // OpenCode session handling isOpenCodeSession?: (sessionFile: string) => boolean; getOpenCodeSessionData?: (sessionFile: string) => Promise<{ tokens: number; interactions: number; modelUsage: any; timestamp: number }>; + // Visual Studio session detection (binary MessagePack — cannot be parsed as JSON) + isVSSessionFile?: (sessionFile: string) => boolean; } /** @@ -671,6 +673,12 @@ export class SyncService { continue; } + // Skip Visual Studio session files — they are binary MessagePack, not JSON + if (this.deps.isVSSessionFile && this.deps.isVSSessionFile(sessionFile)) { + filesSkipped++; + continue; + } + // Handle OpenCode sessions separately (different data format) if (this.deps.isOpenCodeSession && this.deps.isOpenCodeSession(sessionFile)) { if (!this.deps.getOpenCodeSessionData) { diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index 056631ae..e0e0120f 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -7078,6 +7078,8 @@ export function activate(context: vscode.ExtensionContext) { (tokenTracker as any).openCode.isOpenCodeSessionFile(sessionFile), getOpenCodeSessionData: (sessionFile: string) => (tokenTracker as any).getOpenCodeSessionData(sessionFile), + isVSSessionFile: (sessionFile: string) => + (tokenTracker as any).visualStudio.isVSSessionFile(sessionFile), }); const backendHandler = new BackendCommandHandler({ From 4b3c1dbc2e2e316b18266b43a6d45137e9f50527 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 22:35:49 +0100 Subject: [PATCH 05/14] Enhance dashboard data handling by incorporating machineId for user identity resolution and updating backfill completion messaging --- vscode-extension/src/extension.ts | 47 +++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index e0e0120f..4b52a8a1 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -5296,7 +5296,7 @@ ${hashtag}`; }); }); this.log('✅ Historical data backfill complete'); - vscode.window.showInformationMessage('Historical data backfill complete. Refreshing dashboard...'); + vscode.window.setStatusBarMessage('$(check) Backfill complete. Refreshing dashboard...', 5000); // Invalidate the cached dashboard data so the refresh reflects the new backfill this.lastDashboardData = undefined; await this.refreshDashboardPanel(); @@ -5319,6 +5319,8 @@ ${hashtag}`; const { BackendUtility } = await import("./backend/services/utilityService.js"); + const { computeBackendSharingPolicy, hashMachineIdForTeam } = + await import("./backend/sharingProfile.js"); const settings = this.backend.getSettings(); // Log backend settings for debugging @@ -5326,10 +5328,26 @@ ${hashtag}`; `[Dashboard] Backend settings - userIdentityMode: ${settings.userIdentityMode}, configured userId: "${settings.userId}", datasetId: "${settings.datasetId}"`, ); + // Compute the effective sharing policy so we know how entities were stored + const sharingPolicy = computeBackendSharingPolicy({ + enabled: settings.enabled ?? true, + profile: settings.sharingProfile ?? 'off', + shareWorkspaceMachineNames: settings.shareWorkspaceMachineNames ?? false, + }); + // Resolve the effective userId for the current user based on backend config const currentUserId = await this.backend.resolveEffectiveUserId(settings); - if (!currentUserId) { + // When includeUserDimension is false (soloFull / teamAnonymized), entities are stored + // without a userId. In that case, fall back to matching personal data by machineId. + const rawMachineId = vscode.env.machineId; + const currentMachineId = sharingPolicy.includeUserDimension + ? "" // not needed — we match by userId + : sharingPolicy.machineIdStrategy === "hashed" + ? hashMachineIdForTeam({ datasetId: settings.datasetId ?? "", machineId: rawMachineId }) + : rawMachineId; // 'raw' strategy (soloFull) + + if (!currentUserId && !currentMachineId) { this.warn( "[Dashboard] No user identity available. Ensure sharing profile includes user dimension.", ); @@ -5456,8 +5474,12 @@ ${hashtag}`; } } - // Personal data aggregation - match against resolved userId - if (currentUserId && userId === currentUserId) { + // Personal data aggregation - match against resolved userId (or machineId when + // includeUserDimension is false, i.e. soloFull / teamAnonymized profiles). + const isCurrentUser = sharingPolicy.includeUserDimension + ? (currentUserId !== "" && userId === currentUserId) + : (currentMachineId !== "" && machineId === currentMachineId); + if (isCurrentUser) { personalTotalTokens += tokens; personalTotalInteractions += interactions; personalDevices.add(machineId); @@ -5470,9 +5492,12 @@ ${hashtag}`; personalModelUsage[model].outputTokens += outputTokens; } - // Team data aggregation - use userId|datasetId as key to track users across datasets - if (userId && userId.trim()) { - const userKey = `${userId}|${datasetId}`; + // Team data aggregation - use userId|datasetId as key to track users across datasets. + // When includeUserDimension is false, use machineId as the team member key so that + // each machine appears as a distinct entry even though no userId was stored. + const teamMemberKey = (userId && userId.trim()) ? userId : (machineId ? `machine:${machineId}` : ""); + if (teamMemberKey) { + const userKey = `${teamMemberKey}|${datasetId}`; if (!userMap.has(userKey)) { userMap.set(userKey, { tokens: 0, @@ -5713,11 +5738,15 @@ ${hashtag}`; // For the current user, override the fluency score with the locally-computed one. // Azure Table Storage only contains recently-synced schema-v4 entities (a small window), // while calculateMaturityScores() uses the full local session log history. - if (currentUserId) { + // When includeUserDimension is false, the team member key is "machine:". + const currentTeamMemberKey = currentUserId + ? currentUserId + : currentMachineId ? `machine:${currentMachineId}` : ""; + if (currentTeamMemberKey) { try { const localMaturity = await this.calculateMaturityScores(true); for (const member of teamMembers) { - if (member.userId === currentUserId) { + if (member.userId === currentTeamMemberKey) { member.fluencyStage = localMaturity.overallStage; member.fluencyLabel = localMaturity.overallLabel; member.fluencyCategories = localMaturity.categories.map(c => ({ From 929d1e9f029cde2c3517abe631c46ca300978685 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 22:36:04 +0100 Subject: [PATCH 06/14] Refactor trigger type determination to use environment variables for improved readability --- .github/workflows/cli-publish.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cli-publish.yml b/.github/workflows/cli-publish.yml index 6bca8c30..7297fb38 100644 --- a/.github/workflows/cli-publish.yml +++ b/.github/workflows/cli-publish.yml @@ -71,10 +71,13 @@ jobs: - name: Determine trigger type id: trigger_type + env: + EVENT_NAME: ${{ github.event_name }} + GIT_REF: ${{ github.ref }} run: | - if [[ "${{ github.event_name }}" == "push" && "${{ github.ref }}" == refs/tags/cli/v* ]]; then + if [[ "$EVENT_NAME" == "push" && "$GIT_REF" == refs/tags/cli/v* ]]; then echo "is_tag=true" >> "$GITHUB_OUTPUT" - TAG_VERSION=${GITHUB_REF#refs/tags/cli/v} + TAG_VERSION=${GIT_REF#refs/tags/cli/v} echo "tag_version=$TAG_VERSION" >> "$GITHUB_OUTPUT" echo "Triggered by tag push: cli/v$TAG_VERSION" else From 8b4d2018c1f9c7c2db814ec1e389fe686174a510 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 22:57:21 +0100 Subject: [PATCH 07/14] Getting ready for a new release --- vscode-extension/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vscode-extension/package.json b/vscode-extension/package.json index 31a98c21..586bd27a 100644 --- a/vscode-extension/package.json +++ b/vscode-extension/package.json @@ -2,7 +2,7 @@ "name": "copilot-token-tracker", "displayName": "AI Engineering Fluency", "description": "Track your AI Engineering Fluency — daily and monthly token usage, cost estimates, and productivity insights in VS Code.", - "version": "0.0.23", + "version": "0.0.24", "publisher": "RobBos", "engines": { "vscode": "^1.110.0" From bfffb037372e8135aea9f3339976853da71a3f7a Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 22:57:58 +0100 Subject: [PATCH 08/14] Improve initial token usage analysis delay and refactor file system checks to use async methods for better performance --- vscode-extension/src/extension.ts | 12 +++- vscode-extension/src/sessionDiscovery.ts | 80 +++++++++++++----------- 2 files changed, 55 insertions(+), 37 deletions(-) diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index 4b52a8a1..23422cda 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -496,6 +496,14 @@ class CopilotTokenTracker implements vscode.Disposable { private scheduleInitialUpdate(): void { this.log('🚀 Starting token usage analysis...'); + // Use a longer delay (3 s) so that: + // 1. VS Code and other extensions finish their own startup work first. + // 2. On macOS, the TCC privacy framework has time to resolve any first-time + // folder-access permissions before our synchronous filesystem scan begins. + // Without this delay the sync fs calls block the shared extension-host + // event loop and make VS Code appear frozen. + // Previously a "wait for Copilot ready" gate provided a similar natural delay; + // this explicit wait restores that behaviour for users who do not have Copilot. setTimeout(async () => { try { await this.updateTokenStats(); @@ -505,7 +513,7 @@ class CopilotTokenTracker implements vscode.Disposable { } catch (error) { this.error('Error in initial update:', error); } - }, 100); + }, 3000); } /** @@ -5449,7 +5457,7 @@ ${hashtag}`; for (const entity of allEntities) { const userId = (entity.userId ?? "").toString().replace(/^u:/, ""); // Strip u: prefix const datasetId = (entity.datasetId ?? "").toString().replace(/^ds:/, ""); // Strip ds: prefix - const machineId = (entity.machineId ?? "").toString(); + const machineId = (entity.machineId ?? "").toString().replace(/^mc:/, ""); // Strip mc: prefix const workspaceId = (entity.workspaceId ?? "").toString(); const model = (entity.model ?? "").toString().replace(/^m:/, ""); // Strip m: prefix const inputTokens = Number.isFinite(Number(entity.inputTokens)) diff --git a/vscode-extension/src/sessionDiscovery.ts b/vscode-extension/src/sessionDiscovery.ts index 5a94dbd6..46a8e9de 100644 --- a/vscode-extension/src/sessionDiscovery.ts +++ b/vscode-extension/src/sessionDiscovery.ts @@ -189,8 +189,8 @@ export class SessionDiscovery { if (sampleDir && sampleDir.trim().length > 0) { const resolvedSampleDir = sampleDir.trim(); try { - if (fs.existsSync(resolvedSampleDir)) { - const sampleFiles = fs.readdirSync(resolvedSampleDir) + if (await this.pathExists(resolvedSampleDir)) { + const sampleFiles = (await fs.promises.readdir(resolvedSampleDir)) .filter(f => f.endsWith('.json') || f.endsWith('.jsonl')) .map(f => path.join(resolvedSampleDir, f)); this.deps.log(`📸 Sample data mode: using ${sampleFiles.length} file(s) from ${resolvedSampleDir}`); @@ -224,7 +224,7 @@ export class SessionDiscovery { for (let i = 0; i < allVSCodePaths.length; i++) { const codeUserPath = allVSCodePaths[i]; try { - if (fs.existsSync(codeUserPath)) { + if (await this.pathExists(codeUserPath)) { foundPaths.push(codeUserPath); } } catch (checkError) { @@ -250,16 +250,16 @@ export class SessionDiscovery { // Workspace storage sessions const workspaceStoragePath = path.join(codeUserPath, 'workspaceStorage'); try { - if (fs.existsSync(workspaceStoragePath)) { + if (await this.pathExists(workspaceStoragePath)) { try { - const workspaceDirs = fs.readdirSync(workspaceStoragePath); + const workspaceDirs = await fs.promises.readdir(workspaceStoragePath); for (const workspaceDir of workspaceDirs) { const chatSessionsPath = path.join(workspaceStoragePath, workspaceDir, 'chatSessions'); try { - if (fs.existsSync(chatSessionsPath)) { + if (await this.pathExists(chatSessionsPath)) { try { - const sessionFiles2 = fs.readdirSync(chatSessionsPath) + const sessionFiles2 = (await fs.promises.readdir(chatSessionsPath)) .filter(file => file.endsWith('.json') || file.endsWith('.jsonl')) .map(file => path.join(chatSessionsPath, file)); if (sessionFiles2.length > 0) { @@ -285,9 +285,9 @@ export class SessionDiscovery { // Global storage sessions (legacy emptyWindowChatSessions) const globalStoragePath = path.join(codeUserPath, 'globalStorage', 'emptyWindowChatSessions'); try { - if (fs.existsSync(globalStoragePath)) { + if (await this.pathExists(globalStoragePath)) { try { - const globalSessionFiles = fs.readdirSync(globalStoragePath) + const globalSessionFiles = (await fs.promises.readdir(globalStoragePath)) .filter(file => file.endsWith('.json') || file.endsWith('.jsonl')) .map(file => path.join(globalStoragePath, file)); if (globalSessionFiles.length > 0) { @@ -305,9 +305,9 @@ export class SessionDiscovery { // GitHub Copilot Chat extension global storage const copilotChatGlobalPath = path.join(codeUserPath, 'globalStorage', 'github.copilot-chat'); try { - if (fs.existsSync(copilotChatGlobalPath)) { + if (await this.pathExists(copilotChatGlobalPath)) { this.deps.log(`📄 Scanning ${pathName}/globalStorage/github.copilot-chat`); - this.scanDirectoryForSessionFiles(copilotChatGlobalPath, sessionFiles); + await this.scanDirectoryForSessionFiles(copilotChatGlobalPath, sessionFiles); } } catch (checkError) { this.deps.warn(`Could not check Copilot Chat global storage path ${copilotChatGlobalPath}: ${checkError}`); @@ -316,11 +316,12 @@ export class SessionDiscovery { // Check for Copilot CLI session-state directory (new location for agent mode sessions) const copilotCliSessionPath = path.join(os.homedir(), '.copilot', 'session-state'); - this.deps.log(`📁 Checking Copilot CLI path: ${copilotCliSessionPath} (exists: ${fs.existsSync(copilotCliSessionPath)})`); + const copilotCliExists = await this.pathExists(copilotCliSessionPath); + this.deps.log(`📁 Checking Copilot CLI path: ${copilotCliSessionPath} (exists: ${copilotCliExists})`); try { - if (fs.existsSync(copilotCliSessionPath)) { + if (copilotCliExists) { try { - const entries = fs.readdirSync(copilotCliSessionPath, { withFileTypes: true }); + const entries = await fs.promises.readdir(copilotCliSessionPath, { withFileTypes: true }); // Collect flat .json/.jsonl files at the top level const cliSessionFiles = entries @@ -337,12 +338,10 @@ export class SessionDiscovery { for (const subDir of subDirs) { const eventsFile = path.join(copilotCliSessionPath, subDir.name, 'events.jsonl'); try { - if (fs.existsSync(eventsFile)) { - const stats = fs.statSync(eventsFile); - if (stats.size > 0) { - sessionFiles.push(eventsFile); - subDirSessionCount++; - } + const eventsStats = await fs.promises.stat(eventsFile); + if (eventsStats.size > 0) { + sessionFiles.push(eventsFile); + subDirSessionCount++; } } catch { // Ignore individual file access errors @@ -364,20 +363,22 @@ export class SessionDiscovery { const openCodeDataDir = this.deps.openCode.getOpenCodeDataDir(); const openCodeSessionDir = path.join(openCodeDataDir, 'storage', 'session'); const openCodeDbPath = path.join(openCodeDataDir, 'opencode.db'); - this.deps.log(`📁 Checking OpenCode JSON path: ${openCodeSessionDir} (exists: ${fs.existsSync(openCodeSessionDir)})`); - this.deps.log(`📁 Checking OpenCode DB path: ${openCodeDbPath} (exists: ${fs.existsSync(openCodeDbPath)})`); + const openCodeSessionDirExists = await this.pathExists(openCodeSessionDir); + const openCodeDbExists = await this.pathExists(openCodeDbPath); + this.deps.log(`📁 Checking OpenCode JSON path: ${openCodeSessionDir} (exists: ${openCodeSessionDirExists})`); + this.deps.log(`📁 Checking OpenCode DB path: ${openCodeDbPath} (exists: ${openCodeDbExists})`); try { - if (fs.existsSync(openCodeSessionDir)) { - const scanOpenCodeDir = (dir: string) => { + if (openCodeSessionDirExists) { + const scanOpenCodeDir = async (dir: string): Promise => { try { - const entries = fs.readdirSync(dir, { withFileTypes: true }); + const entries = await fs.promises.readdir(dir, { withFileTypes: true }); for (const entry of entries) { if (entry.isDirectory()) { - scanOpenCodeDir(path.join(dir, entry.name)); + await scanOpenCodeDir(path.join(dir, entry.name)); } else if (entry.name.startsWith('ses_') && entry.name.endsWith('.json')) { const fullPath = path.join(dir, entry.name); try { - const stats = fs.statSync(fullPath); + const stats = await fs.promises.stat(fullPath); if (stats.size > 0) { sessionFiles.push(fullPath); } @@ -390,7 +391,7 @@ export class SessionDiscovery { // Ignore directory access errors } }; - scanOpenCodeDir(openCodeSessionDir); + await scanOpenCodeDir(openCodeSessionDir); const openCodeCount = sessionFiles.length - (sessionFiles.filter(f => !this.deps.openCode.isOpenCodeSessionFile(f))).length; if (openCodeCount > 0) { this.deps.log(`📄 Found ${openCodeCount} session files in OpenCode storage`); @@ -403,7 +404,7 @@ export class SessionDiscovery { // Check for OpenCode sessions in SQLite database (opencode.db) // Newer OpenCode versions store sessions in SQLite instead of JSON files try { - if (fs.existsSync(openCodeDbPath)) { + if (openCodeDbExists) { const existingSessionIds = new Set( sessionFiles .filter(f => this.deps.openCode.isOpenCodeSessionFile(f)) @@ -435,9 +436,10 @@ export class SessionDiscovery { let crushTotal = 0; for (const project of crushProjects) { const dbPath = path.join(project.data_dir, 'crush.db'); - this.deps.log(`📁 Checking Crush DB path: ${dbPath} (exists: ${fs.existsSync(dbPath)})`); + const crushDbExists = await this.pathExists(dbPath); + this.deps.log(`📁 Checking Crush DB path: ${dbPath} (exists: ${crushDbExists})`); try { - if (fs.existsSync(dbPath)) { + if (crushDbExists) { const sessionIds = await this.deps.crush.discoverSessionsInDb(dbPath); for (const sessionId of sessionIds) { // Virtual path: /crush.db# @@ -493,18 +495,26 @@ export class SessionDiscovery { return sessionFiles; } + /** + * Async helper: resolves true if the path is accessible, false otherwise. + * Prefer this over fs.existsSync() to avoid blocking the extension-host event loop. + */ + private async pathExists(p: string): Promise { + try { await fs.promises.access(p); return true; } catch { return false; } + } + /** * Recursively scan a directory for session files (.json and .jsonl) * * NOTE: Mirrors logic in .github/skills/copilot-log-analysis/session-file-discovery.js */ - scanDirectoryForSessionFiles(dir: string, sessionFiles: string[]): void { + async scanDirectoryForSessionFiles(dir: string, sessionFiles: string[]): Promise { try { - const entries = fs.readdirSync(dir, { withFileTypes: true }); + const entries = await fs.promises.readdir(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); if (entry.isDirectory()) { - this.scanDirectoryForSessionFiles(fullPath, sessionFiles); + await this.scanDirectoryForSessionFiles(fullPath, sessionFiles); } else if (entry.name.endsWith('.json') || entry.name.endsWith('.jsonl')) { // Skip known non-session files (embeddings, indexes, etc.) if (this.isNonSessionFile(entry.name)) { @@ -512,7 +522,7 @@ export class SessionDiscovery { } // Only add files that look like session files (have reasonable content) try { - const stats = fs.statSync(fullPath); + const stats = await fs.promises.stat(fullPath); if (stats.size > 0) { sessionFiles.push(fullPath); } From 53b7960eb6bf2b9fd195ae2a6c90a08f71955bc1 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 23:03:36 +0100 Subject: [PATCH 09/14] Update description in package.json for clarity on AI fluency insights --- vscode-extension/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vscode-extension/package.json b/vscode-extension/package.json index 586bd27a..e2d68522 100644 --- a/vscode-extension/package.json +++ b/vscode-extension/package.json @@ -1,7 +1,7 @@ { "name": "copilot-token-tracker", "displayName": "AI Engineering Fluency", - "description": "Track your AI Engineering Fluency — daily and monthly token usage, cost estimates, and productivity insights in VS Code.", + "description": "Track your AI Engineering Fluency — daily and monthly token usage, cost estimates, and AI fluency insights in VS Code.", "version": "0.0.24", "publisher": "RobBos", "engines": { From 7b9fd3c46d19d71bbe0f11ba45d100352a8d8a78 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Fri, 27 Mar 2026 23:52:05 +0100 Subject: [PATCH 10/14] Add crush session handling and enhance dashboard stats calculation --- .claude/settings.local.json | 7 ++++ vscode-extension/src/backend/facade.ts | 10 +++++ .../src/backend/services/syncService.ts | 41 ++++++++++++++++++ vscode-extension/src/crush.ts | 42 +++++++++++++++++++ vscode-extension/src/extension.ts | 26 ++++++++++-- .../src/webview/dashboard/main.ts | 6 +-- 6 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 .claude/settings.local.json diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 00000000..cbd3cfd1 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "Bash(xargs grep:*)" + ] + } +} diff --git a/vscode-extension/src/backend/facade.ts b/vscode-extension/src/backend/facade.ts index 1aadc2ca..94b3c10f 100644 --- a/vscode-extension/src/backend/facade.ts +++ b/vscode-extension/src/backend/facade.ts @@ -73,6 +73,14 @@ export interface BackendFacadeDeps { modelUsage: ModelUsage; timestamp: number; }>; + // Crush session handling (per-project crush.db virtual paths) + isCrushSession?: (sessionFile: string) => boolean; + getCrushSessionData?: (sessionFile: string) => Promise<{ + tokens: number; + interactions: number; + modelUsage: ModelUsage; + timestamp: number; + }>; // Visual Studio session detection (binary MessagePack — cannot be parsed as JSON) isVSSessionFile?: (sessionFile: string) => boolean; } @@ -128,6 +136,8 @@ export class BackendFacade { statSessionFile: deps.statSessionFile, isOpenCodeSession: deps.isOpenCodeSession, getOpenCodeSessionData: deps.getOpenCodeSessionData, + isCrushSession: deps.isCrushSession, + getCrushSessionData: deps.getCrushSessionData, isVSSessionFile: deps.isVSSessionFile, }, this.credentialService, diff --git a/vscode-extension/src/backend/services/syncService.ts b/vscode-extension/src/backend/services/syncService.ts index eb39cd0c..e9ffa6f8 100644 --- a/vscode-extension/src/backend/services/syncService.ts +++ b/vscode-extension/src/backend/services/syncService.ts @@ -84,6 +84,9 @@ export interface SyncServiceDeps { // OpenCode session handling isOpenCodeSession?: (sessionFile: string) => boolean; getOpenCodeSessionData?: (sessionFile: string) => Promise<{ tokens: number; interactions: number; modelUsage: any; timestamp: number }>; + // Crush session handling (per-project crush.db virtual paths) + isCrushSession?: (sessionFile: string) => boolean; + getCrushSessionData?: (sessionFile: string) => Promise<{ tokens: number; interactions: number; modelUsage: any; timestamp: number }>; // Visual Studio session detection (binary MessagePack — cannot be parsed as JSON) isVSSessionFile?: (sessionFile: string) => boolean; } @@ -717,6 +720,44 @@ export class SyncService { } } + // Handle Crush sessions separately (virtual paths pointing to crush.db SQLite entries) + if (this.deps.isCrushSession && this.deps.isCrushSession(sessionFile)) { + if (!this.deps.getCrushSessionData) { + filesSkipped++; + continue; + } + + try { + const data = await this.deps.getCrushSessionData(sessionFile); + const eventMs = data.timestamp || fileMtimeMs; + + if (!eventMs || eventMs < startMs) { + filesSkipped++; + continue; + } + + const dayKey = this.utility.toUtcDayKey(new Date(eventMs)); + // Crush paths: /.crush/crush.db# — no workspaceStorage segment + const workspaceId = this.utility.extractWorkspaceIdFromSessionPath(sessionFile); + await this.ensureWorkspaceNameResolved(workspaceId, sessionFile, workspaceNamesById); + + for (const [model, usage] of Object.entries(data.modelUsage)) { + const key: DailyRollupKey = { day: dayKey, model, workspaceId, machineId, userId }; + upsertDailyRollup(rollups as any, key, { + inputTokens: (usage as any).inputTokens || 0, + outputTokens: (usage as any).outputTokens || 0, + interactions: (usage as any).interactions || 0, + }); + } + + filesProcessed++; + continue; + } catch (e) { + this.deps.warn(`Backend sync: failed to process Crush session ${sessionFile}: ${e}`); + continue; + } + } + const workspaceId = this.utility.extractWorkspaceIdFromSessionPath(sessionFile); await this.ensureWorkspaceNameResolved(workspaceId, sessionFile, workspaceNamesById); diff --git a/vscode-extension/src/crush.ts b/vscode-extension/src/crush.ts index 9fa3ebb5..56f12704 100644 --- a/vscode-extension/src/crush.ts +++ b/vscode-extension/src/crush.ts @@ -257,6 +257,48 @@ export class CrushDataAccess { return modelUsage; } + /** + * Returns a unified session data object for backend sync — mirrors the shape + * that OpenCodeDataAccess.getOpenCodeSessionData() produces. + * + * Timestamp: the session's `created_at` Unix-seconds converted to milliseconds. + * Token counts: actual prompt_tokens + completion_tokens from the sessions table. + * Model usage: proportionally distributed across assistant-message models. + * Interactions: number of user-role messages. + */ + async getCrushSessionData(virtualPath: string): Promise<{ + tokens: number; + interactions: number; + modelUsage: ModelUsage & { [key: string]: { inputTokens: number; outputTokens: number; interactions?: number } }; + timestamp: number; + }> { + const session = await this.readCrushSession(virtualPath); + if (!session) { + return { tokens: 0, interactions: 0, modelUsage: {}, timestamp: 0 }; + } + const prompt = typeof session.prompt_tokens === 'number' ? session.prompt_tokens : 0; + const completion = typeof session.completion_tokens === 'number' ? session.completion_tokens : 0; + const tokens = prompt + completion; + // created_at is a Unix timestamp in seconds + const timestamp = typeof session.created_at === 'number' ? session.created_at * 1000 : Date.now(); + const modelUsage = await this.getCrushModelUsage(virtualPath); + const messages = await this.getCrushMessages(virtualPath); + const interactions = messages.filter(m => m.role === 'user').length; + // Annotate each model entry with an interaction count proportional to its token share + const totalTokens = prompt + completion; + const modelUsageWithInteractions: { [key: string]: { inputTokens: number; outputTokens: number; interactions?: number } } = {}; + for (const [model, usage] of Object.entries(modelUsage)) { + const modelTotal = usage.inputTokens + usage.outputTokens; + const fraction = totalTokens > 0 ? modelTotal / totalTokens : 0; + modelUsageWithInteractions[model] = { + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens, + interactions: Math.round(interactions * fraction), + }; + } + return { tokens, interactions, modelUsage: modelUsageWithInteractions, timestamp }; + } + /** * Collect tool-call names from all assistant messages in a session. * Parses the JSON `parts` array for `{type:"tool_call",data:{name:...}}` entries. diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index 23422cda..fcc52b02 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -5771,14 +5771,28 @@ ${hashtag}`; } } - // Fetch local stats to surface the sync coverage gap in the dashboard + // Fetch local stats to surface the sync coverage gap in the dashboard. + // Use the same lookback window as the backend so the comparison is apples-to-apples. let localTokens: number | undefined; let localInteractions: number | undefined; try { const localStats = await this.calculateDetailedStats(undefined); - localTokens = localStats.last30Days.tokens; - const p = localStats.last30Days; - localInteractions = p.sessions * p.avgInteractionsPerSession; + const lookback = settings.lookbackDays ?? 30; + if (lookback >= 30) { + // last30Days already covers the full window + localTokens = localStats.last30Days.tokens; + const p = localStats.last30Days; + localInteractions = p.sessions * p.avgInteractionsPerSession; + } else { + // Sum daily stats for exactly the configured lookback window + const cutoffDate = new Date(); + cutoffDate.setDate(cutoffDate.getDate() - lookback); + const cutoffStr = cutoffDate.toISOString().slice(0, 10); + const dailyStats = this.lastDailyStats ?? []; + const inWindow = dailyStats.filter(d => d.date >= cutoffStr); + localTokens = inWindow.reduce((sum, d) => sum + d.tokens, 0); + localInteractions = inWindow.reduce((sum, d) => sum + d.interactions, 0); + } } catch { // Non-critical: leave undefined } @@ -7115,6 +7129,10 @@ export function activate(context: vscode.ExtensionContext) { (tokenTracker as any).openCode.isOpenCodeSessionFile(sessionFile), getOpenCodeSessionData: (sessionFile: string) => (tokenTracker as any).getOpenCodeSessionData(sessionFile), + isCrushSession: (sessionFile: string) => + (tokenTracker as any).crush.isCrushSessionFile(sessionFile), + getCrushSessionData: (sessionFile: string) => + (tokenTracker as any).crush.getCrushSessionData(sessionFile), isVSSessionFile: (sessionFile: string) => (tokenTracker as any).visualStudio.isVSSessionFile(sessionFile), }); diff --git a/vscode-extension/src/webview/dashboard/main.ts b/vscode-extension/src/webview/dashboard/main.ts index a70bac07..c79eece3 100644 --- a/vscode-extension/src/webview/dashboard/main.ts +++ b/vscode-extension/src/webview/dashboard/main.ts @@ -185,14 +185,14 @@ function renderShell(root: HTMLElement, stats: DashboardStats): void { ); const sections = el("div", "sections"); - sections.append(buildPersonalSection(stats.personal)); + sections.append(buildPersonalSection(stats.personal, stats.lookbackDays ?? 30)); sections.append(buildTeamSection(stats)); container.append(header, sections, footer); root.append(themeStyle, style, container); } -function buildPersonalSection(personal: UserSummary): HTMLElement { +function buildPersonalSection(personal: UserSummary, lookbackDays: number): HTMLElement { const section = el("div", "section"); const sectionTitle = el( "h2", @@ -224,7 +224,7 @@ function buildPersonalSection(personal: UserSummary): HTMLElement { "margin-top: 12px; padding: 10px 14px; background: var(--vscode-inputValidation-warningBackground, rgba(200,120,0,0.1)); border: 1px solid var(--vscode-inputValidation-warningBorder, rgba(200,120,0,0.5)); border-radius: 6px; font-size: 12px; color: var(--vscode-foreground, #ccc);"; const warningTitle = el("div", ""); warningTitle.style.cssText = "font-weight: 600; margin-bottom: 4px;"; - warningTitle.textContent = `⚠️ Only ${syncCoverage}% of your local activity is synced to cloud (${formatCompact(syncedTokens)} of ${formatCompact(localTokens)} local tokens in last 30 days)`; + warningTitle.textContent = `⚠️ Only ${syncCoverage}% of your local activity is synced to cloud (${formatCompact(syncedTokens)} of ${formatCompact(localTokens)} local tokens in last ${lookbackDays} days)`; const warningNote = el("div", ""); warningNote.style.cssText = "font-size: 11px; opacity: 0.7; margin-top: 3px;"; warningNote.textContent = "To close the gap: increase the lookback window, run a manual sync, or check that blob upload is enabled and configured."; From ce3018469bc1df04da7bf93f52e241a03b25c00f Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Sat, 28 Mar 2026 02:27:10 +0100 Subject: [PATCH 11/14] Enhance backfill progress messaging and add JSONL content handling in sync service --- .../src/backend/services/syncService.ts | 115 ++++++++++++++++-- vscode-extension/src/extension.ts | 6 +- 2 files changed, 111 insertions(+), 10 deletions(-) diff --git a/vscode-extension/src/backend/services/syncService.ts b/vscode-extension/src/backend/services/syncService.ts index e9ffa6f8..b83febda 100644 --- a/vscode-extension/src/backend/services/syncService.ts +++ b/vscode-extension/src/backend/services/syncService.ts @@ -20,6 +20,7 @@ import { createDailyAggEntity } from '../storageTables'; import { CredentialService } from './credentialService'; import { DataPlaneService } from './dataPlaneService'; import { BackendUtility } from './utilityService'; +import { isJsonlContent } from '../../tokenEstimation'; /** * Interface for blob upload service to avoid circular dependency. @@ -354,8 +355,62 @@ export class SyncService { // skip malformed line } } + } else if (isJsonlContent(content)) { + // VS Code chat session files have .json extension but use JSONL (patch-based) format. + // Process kind:2 events where k[0]==='requests' — each appends requests to the array. + // Deduplicate by requestId so incrementally-added requests are counted once. + // Track the session-level defaultModel from kind:0 and kind:2/selectedModel events so + // that requests without an explicit modelId still resolve to the correct model key + // (matching what getModelUsageFromSession stores in cachedData.modelUsage). + let defaultModel = 'gpt-4o'; + const seenRequestIds = new Set(); + const lines = content.trim().split('\n'); + for (const line of lines) { + if (!line.trim()) { continue; } + try { + const event = JSON.parse(line); + if (!event || typeof event !== 'object') { continue; } + // Extract session-level default model (same logic as getModelUsageFromSession) + if (event.kind === 0) { + const modelId = event.v?.selectedModel?.identifier || + event.v?.selectedModel?.metadata?.id || + event.v?.inputState?.selectedModel?.metadata?.id; + if (modelId) { defaultModel = modelId.replace(/^copilot\//, ''); } + } + if (event.kind === 2 && Array.isArray(event.k) && event.k[0] === 'selectedModel') { + const modelId = event.v?.identifier || event.v?.metadata?.id; + if (modelId) { defaultModel = modelId.replace(/^copilot\//, ''); } + } + // kind:2, k[0]==='requests' events append new request(s) + if (event.kind === 2 && Array.isArray(event.k) && event.k[0] === 'requests' && Array.isArray(event.v)) { + for (const request of event.v) { + const req = request as ChatRequest; + const reqId = (req as any).requestId as string | undefined; + if (reqId && seenRequestIds.has(reqId)) { continue; } + if (reqId) { seenRequestIds.add(reqId); } + const normalizedTs = this.utility.normalizeTimestampToMs( + typeof req.timestamp !== 'undefined' ? req.timestamp : undefined + ); + const eventMs = Number.isFinite(normalizedTs) ? normalizedTs : fileMtimeMs; + if (!eventMs || eventMs < startMs) { continue; } + const dayKey = this.utility.toUtcDayKey(new Date(eventMs)); + // Use per-request modelId if present, otherwise fall back to the session + // default model (mirrors getModelUsageFromSession delta logic) + const rawModel = (req as any).modelId || (req as any).result?.metadata?.modelId; + const model = rawModel ? (rawModel as string).replace(/^copilot\//, '') : defaultModel; + if (!dayModelInteractions.has(dayKey)) { + dayModelInteractions.set(dayKey, new Map()); + } + const dayMap = dayModelInteractions.get(dayKey)!; + dayMap.set(model, (dayMap.get(model) || 0) + 1); + } + } + } catch { + // skip malformed lines + } + } } else { - // Handle JSON format (VS Code Copilot Chat) + // Handle regular JSON format (VS Code Copilot Chat legacy / OpenCode JSON) try { const sessionJson = JSON.parse(content); if (!sessionJson || typeof sessionJson !== 'object') { @@ -374,7 +429,6 @@ export class SyncService { const dayKey = this.utility.toUtcDayKey(new Date(eventMs)); const model = this.deps.getModelFromRequest(req); - // Track interaction for this day+model if (!dayModelInteractions.has(dayKey)) { @@ -711,8 +765,6 @@ export class SyncService { interactions: (usage as any).interactions || 0 }); } - - filesProcessed++; continue; } catch (e) { this.deps.warn(`Backend sync: failed to process OpenCode session ${sessionFile}: ${e}`); @@ -749,8 +801,6 @@ export class SyncService { interactions: (usage as any).interactions || 0, }); } - - filesProcessed++; continue; } catch (e) { this.deps.warn(`Backend sync: failed to process Crush session ${sessionFile}: ${e}`); @@ -794,8 +844,11 @@ export class SyncService { this.deps.warn(`Backend sync: failed to read session file ${sessionFile}: ${e}`); continue; } - // JSONL (Copilot CLI) - if (sessionFile.endsWith('.jsonl')) { + // JSONL (Copilot CLI or VS Code chat .json with JSONL content) + if (sessionFile.endsWith('.jsonl') || isJsonlContent(content)) { + let defaultModel = 'gpt-4o'; + const isVsCodeFormat = !sessionFile.endsWith('.jsonl'); + const seenReqIds = new Set(); const lines = content.trim().split('\n'); for (const line of lines) { if (!line.trim()) { @@ -806,13 +859,54 @@ export class SyncService { if (!event || typeof event !== 'object') { continue; } + // VS Code delta-based: track default model from session header events + if (isVsCodeFormat) { + if (event.kind === 0) { + const mId = event.v?.selectedModel?.identifier || event.v?.selectedModel?.metadata?.id || event.v?.inputState?.selectedModel?.metadata?.id; + if (mId) { defaultModel = mId.replace(/^copilot\//, ''); } + } + if (event.kind === 2 && Array.isArray(event.k) && event.k[0] === 'selectedModel') { + const mId = event.v?.identifier || event.v?.metadata?.id; + if (mId) { defaultModel = mId.replace(/^copilot\//, ''); } + } + if (event.kind === 2 && Array.isArray(event.k) && event.k[0] === 'requests' && Array.isArray(event.v)) { + for (const request of event.v) { + const req = request as ChatRequest; + const reqId = (req as any).requestId as string | undefined; + if (reqId && seenReqIds.has(reqId)) { continue; } + if (reqId) { seenReqIds.add(reqId); } + const normalizedTs = this.utility.normalizeTimestampToMs(typeof req.timestamp !== 'undefined' ? req.timestamp : undefined); + const eventMs = Number.isFinite(normalizedTs) ? normalizedTs : fileMtimeMs; + if (!eventMs || eventMs < startMs) { continue; } + const dayKey = this.utility.toUtcDayKey(new Date(eventMs)); + const rawModel = (req as any).modelId || (req as any).result?.metadata?.modelId; + const model = rawModel ? (rawModel as string).replace(/^copilot\//, '') : defaultModel; + + let inputTokens = 0; + let outputTokens = 0; + if ((req as any).message?.text) { + inputTokens = this.deps.estimateTokensFromText((req as any).message.text, model); + } + if (Array.isArray((req as any).response)) { + for (const r of (req as any).response) { + if (typeof r?.value === 'string') { outputTokens += this.deps.estimateTokensFromText(r.value, model); } + } + } + if (inputTokens === 0 && outputTokens === 0) { continue; } + const key: DailyRollupKey = { day: dayKey, model, workspaceId, machineId, userId }; + upsertDailyRollup(rollups as any, key, { inputTokens, outputTokens, interactions: 1 }); + } + } + continue; // processed as VS Code delta event; skip CLI logic below + } + // Copilot CLI non-delta format below const normalizedTs = this.utility.normalizeTimestampToMs(event.timestamp); const eventMs = Number.isFinite(normalizedTs) ? normalizedTs : fileMtimeMs; if (!eventMs || eventMs < startMs) { continue; } const dayKey = this.utility.toUtcDayKey(new Date(eventMs)); - const model = (event.model || 'gpt-4o').toString(); + const model = (event.model || defaultModel).toString(); let inputTokens = 0; let outputTokens = 0; @@ -1183,6 +1277,9 @@ export class SyncService { entities.push(entity); } + // Signal upload phase to caller before the (potentially slow) upsert + onProgress?.(-1, entities.length, sortedDays.length); + const { successCount, errors } = await this.dataPlaneService.upsertEntitiesBatch(tableClient, entities); if (errors.length > 0) { this.deps.warn(`Backfill: ${successCount}/${entities.length} entities synced, ${errors.length} failed`); diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index fcc52b02..5a8d4c7a 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -5295,9 +5295,13 @@ ${hashtag}`; try { await this.backend.backfillHistoricalData(365, (processed, total, daysFound) => { + // processed === -1 is a sentinel signalling the upload phase (total = entity count, daysFound = days) + const text = processed === -1 + ? `Backfill: uploading ${total} entries for ${daysFound} days to Azure...` + : `Backfill in progress: ${processed}${total > 0 ? `/${total}` : ''} files scanned, ${daysFound} days found...`; this.dashboardPanel?.webview.postMessage({ command: 'backfillProgress', - text: `Backfill in progress: ${processed}${total > 0 ? `/${total}` : ''} files scanned, ${daysFound} days found...`, + text, processed, total, daysFound, From 185d78677f36adf4a14bac5a4e54c83a8cdeb21e Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Mon, 30 Mar 2026 10:09:08 +0200 Subject: [PATCH 12/14] Fix localInteractions to use exact daily stats instead of rounded approximation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous code computed localInteractions as: p.sessions * p.avgInteractionsPerSession But avgInteractionsPerSession is stored as Math.round(interactions / sessions), so the reverse multiplication could diverge significantly from the true total, causing the sync-coverage warning to trigger or be suppressed incorrectly. Fix: use the same daily-stats approach already used for the lookback < 30 branch — sum d.interactions across lastDailyStats entries within the lookback window. This gives the exact interaction count with no rounding loss. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- vscode-extension/src/extension.ts | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index 5a8d4c7a..b48a0e1f 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -5780,23 +5780,18 @@ ${hashtag}`; let localTokens: number | undefined; let localInteractions: number | undefined; try { - const localStats = await this.calculateDetailedStats(undefined); + await this.calculateDetailedStats(undefined); // ensures lastDailyStats is fresh const lookback = settings.lookbackDays ?? 30; - if (lookback >= 30) { - // last30Days already covers the full window - localTokens = localStats.last30Days.tokens; - const p = localStats.last30Days; - localInteractions = p.sessions * p.avgInteractionsPerSession; - } else { - // Sum daily stats for exactly the configured lookback window - const cutoffDate = new Date(); - cutoffDate.setDate(cutoffDate.getDate() - lookback); - const cutoffStr = cutoffDate.toISOString().slice(0, 10); - const dailyStats = this.lastDailyStats ?? []; - const inWindow = dailyStats.filter(d => d.date >= cutoffStr); - localTokens = inWindow.reduce((sum, d) => sum + d.tokens, 0); - localInteractions = inWindow.reduce((sum, d) => sum + d.interactions, 0); - } + // Always derive exact counts from daily stats so we avoid the rounding loss introduced + // by avgInteractionsPerSession = Math.round(interactions / sessions). + // lastDailyStats covers the last 30 days; for longer windows it is the best available data. + const cutoffDate = new Date(); + cutoffDate.setDate(cutoffDate.getDate() - lookback); + const cutoffStr = cutoffDate.toISOString().slice(0, 10); + const dailyStats = this.lastDailyStats ?? []; + const inWindow = dailyStats.filter(d => d.date >= cutoffStr); + localTokens = inWindow.reduce((sum, d) => sum + d.tokens, 0); + localInteractions = inWindow.reduce((sum, d) => sum + d.interactions, 0); } catch { // Non-critical: leave undefined } From ce87569ca22937dcf9b53d7dddd572fcad8dbae3 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Mon, 30 Mar 2026 10:40:44 +0200 Subject: [PATCH 13/14] Fix duplicate pathExists method in sessionDiscovery.ts Merge introduced two copies: one from the PR branch (line 41) and one from main's async-IO refactor commit. Remove the second duplicate. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- vscode-extension/src/sessionDiscovery.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vscode-extension/src/sessionDiscovery.ts b/vscode-extension/src/sessionDiscovery.ts index d5a27f44..c96241aa 100644 --- a/vscode-extension/src/sessionDiscovery.ts +++ b/vscode-extension/src/sessionDiscovery.ts @@ -520,14 +520,6 @@ export class SessionDiscovery { return sessionFiles; } - /** - * Async helper: resolves true if the path is accessible, false otherwise. - * Prefer this over fs.existsSync() to avoid blocking the extension-host event loop. - */ - private async pathExists(p: string): Promise { - try { await fs.promises.access(p); return true; } catch { return false; } - } - /** * Recursively scan a directory for session files (.json and .jsonl) * From 2a3236cc13a639ffc3c343e8ea091d94ecd91bb2 Mon Sep 17 00:00:00 2001 From: Rob Bos Date: Wed, 1 Apr 2026 18:01:47 +0200 Subject: [PATCH 14/14] feat: enhance data processing and cleanup for Azure entities in sync service --- .../src/backend/services/dataPlaneService.ts | 12 +- .../src/backend/services/syncService.ts | 182 ++++++++++++++++-- vscode-extension/src/cacheManager.ts | 2 + vscode-extension/src/extension.ts | 2 +- vscode-extension/src/usageAnalysis.ts | 20 +- 5 files changed, 185 insertions(+), 33 deletions(-) diff --git a/vscode-extension/src/backend/services/dataPlaneService.ts b/vscode-extension/src/backend/services/dataPlaneService.ts index eaff91ed..7b79cd5c 100644 --- a/vscode-extension/src/backend/services/dataPlaneService.ts +++ b/vscode-extension/src/backend/services/dataPlaneService.ts @@ -309,11 +309,13 @@ export class DataPlaneService { const dsPrefix = `ds:${datasetId}`; const uPrefix = `u:${userId}`; - const startDate = new Date(startDayKey); - const endDate = new Date(endDayKey); - endDate.setUTCHours(23, 59, 59, 999); - - const filter = `Timestamp ge datetime'${startDate.toISOString()}' and Timestamp le datetime'${endDate.toISOString()}'`; + // Filter by PartitionKey range instead of system Timestamp. + // PartitionKey format is "ds:{datasetId}|d:{YYYY-MM-DD}". + // Using system Timestamp would miss entities written by older syncs (e.g. written March 1 + // for day "2026-03-15") because their Timestamp predates the startDayKey. + const pkStart = buildAggPartitionKey(datasetId, startDayKey); + const pkEnd = buildAggPartitionKey(datasetId, endDayKey); + const filter = `PartitionKey ge '${pkStart}' and PartitionKey le '${pkEnd}'`; this.log( `Deleting entities for user "${userId}" in dataset "${datasetId}" (${startDayKey} to ${endDayKey})`, diff --git a/vscode-extension/src/backend/services/syncService.ts b/vscode-extension/src/backend/services/syncService.ts index b83febda..9ba29f31 100644 --- a/vscode-extension/src/backend/services/syncService.ts +++ b/vscode-extension/src/backend/services/syncService.ts @@ -320,8 +320,23 @@ export class SyncService { // Map to track per-day per-model interactions for proper distribution const dayModelInteractions = new Map>(); - // Handle JSONL format (Copilot CLI) - if (sessionFile.endsWith('.jsonl')) { + // Detect whether this is a delta-based (VS Code Insiders) JSONL file or a CLI JSONL file. + // Both can use .jsonl extension, but delta-based files have kind:0/1/2 numeric events + // while CLI files use event types like user.message, assistant.message, etc. + // Check the first non-empty line for a numeric "kind" property to distinguish. + let isDeltaBasedJsonl = false; + if (isJsonlContent(content)) { + const firstLine = content.trim().split('\n')[0]?.trim(); + if (firstLine) { + try { + const firstEvent = JSON.parse(firstLine); + isDeltaBasedJsonl = typeof firstEvent.kind === 'number'; + } catch { /* not valid JSON, leave as false */ } + } + } + + // Handle non-delta JSONL format (Copilot CLI) + if (sessionFile.endsWith('.jsonl') && !isDeltaBasedJsonl) { const lines = content.trim().split('\n'); const todayKey = this.utility.toUtcDayKey(now); let lineCount = 0; @@ -355,8 +370,8 @@ export class SyncService { // skip malformed line } } - } else if (isJsonlContent(content)) { - // VS Code chat session files have .json extension but use JSONL (patch-based) format. + } else if (isDeltaBasedJsonl) { + // VS Code delta-based JSONL files (.json or .jsonl extension with kind:0/1/2 events). // Process kind:2 events where k[0]==='requests' — each appends requests to the array. // Deduplicate by requestId so incrementally-added requests are counted once. // Track the session-level defaultModel from kind:0 and kind:2/selectedModel events so @@ -443,6 +458,43 @@ export class SyncService { } } + // Remap event model names to cached model names when there is a mismatch. + // CLI sessions often omit the model in individual events (defaulting to 'gpt-4o') + // while session.shutdown provides the actual model (e.g. 'claude-sonnet-4.6'). + // Without remapping, the lookup `cachedData.modelUsage[eventModel]` silently fails. + const cachedModelNames = Object.keys(cachedData.modelUsage); + if (cachedModelNames.length > 0) { + const allEventModels = new Set(); + for (const modelMap of dayModelInteractions.values()) { + for (const m of modelMap.keys()) { allEventModels.add(m); } + } + const unmappedModels = new Set(); + for (const m of allEventModels) { + if (!cachedData.modelUsage[m]) { unmappedModels.add(m); } + } + if (unmappedModels.size > 0) { + const totalCachedTokens = cachedModelNames.reduce((sum, m) => + sum + cachedData.modelUsage[m].inputTokens + cachedData.modelUsage[m].outputTokens, 0); + for (const [, modelMap] of dayModelInteractions) { + let unmappedCount = 0; + for (const um of unmappedModels) { + unmappedCount += modelMap.get(um) || 0; + modelMap.delete(um); + } + if (unmappedCount > 0) { + for (const cm of cachedModelNames) { + const ct = cachedData.modelUsage[cm].inputTokens + cachedData.modelUsage[cm].outputTokens; + const share = totalCachedTokens > 0 ? ct / totalCachedTokens : 1 / cachedModelNames.length; + const redistributed = Math.round(unmappedCount * share); + if (redistributed > 0) { + modelMap.set(cm, (modelMap.get(cm) || 0) + redistributed); + } + } + } + } + } + } + // Now distribute cached token counts proportionally across day+model combinations // based on the actual interaction distribution we just calculated for (const [dayKey, modelMap] of dayModelInteractions) { @@ -844,10 +896,18 @@ export class SyncService { this.deps.warn(`Backend sync: failed to read session file ${sessionFile}: ${e}`); continue; } - // JSONL (Copilot CLI or VS Code chat .json with JSONL content) + // JSONL (Copilot CLI or VS Code chat .json/.jsonl with delta-based content) if (sessionFile.endsWith('.jsonl') || isJsonlContent(content)) { let defaultModel = 'gpt-4o'; - const isVsCodeFormat = !sessionFile.endsWith('.jsonl'); + // Delta-based format can come from .json or .jsonl files; detect by first-line kind property + let isVsCodeFormat = false; + const firstJsonlLine = content.trim().split('\n')[0]?.trim(); + if (firstJsonlLine) { + try { + const firstEv = JSON.parse(firstJsonlLine); + isVsCodeFormat = typeof firstEv.kind === 'number'; + } catch { /* leave as false */ } + } const seenReqIds = new Set(); const lines = content.trim().split('\n'); for (const line of lines) { @@ -884,12 +944,26 @@ export class SyncService { let inputTokens = 0; let outputTokens = 0; - if ((req as any).message?.text) { - inputTokens = this.deps.estimateTokensFromText((req as any).message.text, model); - } - if (Array.isArray((req as any).response)) { - for (const r of (req as any).response) { - if (typeof r?.value === 'string') { outputTokens += this.deps.estimateTokensFromText(r.value, model); } + // Prefer actual API token counts when available in the request + const reqResult = (req as any).result; + if (reqResult?.usage) { + inputTokens = typeof reqResult.usage.promptTokens === 'number' ? reqResult.usage.promptTokens : 0; + outputTokens = typeof reqResult.usage.completionTokens === 'number' ? reqResult.usage.completionTokens : 0; + } else if (typeof reqResult?.promptTokens === 'number' && typeof reqResult?.outputTokens === 'number') { + inputTokens = reqResult.promptTokens; + outputTokens = reqResult.outputTokens; + } else if (reqResult?.metadata && typeof reqResult.metadata.promptTokens === 'number' && typeof reqResult.metadata.outputTokens === 'number') { + inputTokens = reqResult.metadata.promptTokens; + outputTokens = reqResult.metadata.outputTokens; + } else { + // Fallback to text-based estimation + if ((req as any).message?.text) { + inputTokens = this.deps.estimateTokensFromText((req as any).message.text, model); + } + if (Array.isArray((req as any).response)) { + for (const r of (req as any).response) { + if (typeof r?.value === 'string') { outputTokens += this.deps.estimateTokensFromText(r.value, model); } + } } } if (inputTokens === 0 && outputTokens === 0) { continue; } @@ -963,17 +1037,34 @@ export class SyncService { let inputTokens = 0; let outputTokens = 0; - if (req.message && req.message.parts) { - for (const part of req.message.parts) { - if (part?.text) { - inputTokens += this.deps.estimateTokensFromText(part.text, model); + // Prefer actual API token counts when available + const result = (req as any).result; + if (result?.usage) { + // OLD FORMAT (pre-Feb 2026) + inputTokens = typeof result.usage.promptTokens === 'number' ? result.usage.promptTokens : 0; + outputTokens = typeof result.usage.completionTokens === 'number' ? result.usage.completionTokens : 0; + } else if (typeof result?.promptTokens === 'number' && typeof result?.outputTokens === 'number') { + // NEW FORMAT (Feb 2026+) + inputTokens = result.promptTokens; + outputTokens = result.outputTokens; + } else if (result?.metadata && typeof result.metadata.promptTokens === 'number' && typeof result.metadata.outputTokens === 'number') { + // INSIDERS FORMAT (Feb 2026+): Tokens nested under result.metadata + inputTokens = result.metadata.promptTokens; + outputTokens = result.metadata.outputTokens; + } else { + // Fallback to text-based estimation + if (req.message && req.message.parts) { + for (const part of req.message.parts) { + if (part?.text) { + inputTokens += this.deps.estimateTokensFromText(part.text, model); + } } } - } - if (req.response && Array.isArray(req.response)) { - for (const responseItem of req.response) { - if (typeof responseItem?.value === 'string') { - outputTokens += this.deps.estimateTokensFromText(responseItem.value, model); + if (req.response && Array.isArray(req.response)) { + for (const responseItem of req.response) { + if (typeof responseItem?.value === 'string') { + outputTokens += this.deps.estimateTokensFromText(responseItem.value, model); + } } } } @@ -1101,6 +1192,33 @@ export class SyncService { this.deps.log(`Backend sync: upserting ${rollups.size} rollup entities (lookback ${settings.lookbackDays} days)`); const tableClient = this.dataPlaneService.createTableClient(settings, creds.tableCredential); + + // One-time cleanup: delete stale Azure entities for this user before upserting. + // Previous syncs may have written rows with incorrect model names, which create phantom + // RowKey entries that inflate the dashboard total. We track 'backend.lastCleanSyncVersion' + // so this runs once per cache version bump and not on every sync cycle. + const CLEAN_SYNC_VERSION = 2; // Bump when the delete logic changes + const lastCleanVersion = this.deps.context?.globalState.get('backend.lastCleanSyncVersion') ?? 0; + const cacheWasCleared = lastCleanVersion < CLEAN_SYNC_VERSION; + if (cacheWasCleared && resolvedIdentity.userId && sortedDays.length > 0) { + const startDayKey = sortedDays[0]; + const endDayKey = sortedDays[sortedDays.length - 1]; + this.deps.log(`Backend sync: cleaning stale entities for user "${resolvedIdentity.userId}" (${startDayKey} to ${endDayKey})`); + try { + const deleteResult = await this.dataPlaneService.deleteEntitiesForUserDataset({ + tableClient, + userId: resolvedIdentity.userId, + datasetId: settings.datasetId, + startDayKey, + endDayKey, + }); + this.deps.log(`Backend sync: deleted ${deleteResult.deletedCount} stale entities (${deleteResult.errors.length} errors)`); + await this.deps.context?.globalState.update('backend.lastCleanSyncVersion', CLEAN_SYNC_VERSION); + } catch (e) { + this.deps.warn(`Backend sync: failed to clean stale entities: ${e}`); + } + } + const entities = []; for (const { key, value } of rollups.values()) { const effectiveUserId = (key.userId ?? '').trim() || undefined; @@ -1280,6 +1398,28 @@ export class SyncService { // Signal upload phase to caller before the (potentially slow) upsert onProgress?.(-1, entities.length, sortedDays.length); + // Delete stale entities for this user before upserting. + // Previous syncs may have written rows with incorrect model names (e.g. 'gpt-4o' instead + // of the actual model). Since the model name is part of the RowKey, corrected data creates + // new rows while old ones persist, causing over-counting on the dashboard. + if (resolvedIdentity.userId && sortedDays.length > 0) { + const startDayKey = sortedDays[0]; + const endDayKey = sortedDays[sortedDays.length - 1]; + this.deps.log(`Backfill: cleaning stale entities for user "${resolvedIdentity.userId}" in date range ${startDayKey} to ${endDayKey}`); + try { + const deleteResult = await this.dataPlaneService.deleteEntitiesForUserDataset({ + tableClient, + userId: resolvedIdentity.userId, + datasetId: settings.datasetId, + startDayKey, + endDayKey, + }); + this.deps.log(`Backfill: deleted ${deleteResult.deletedCount} stale entities (${deleteResult.errors.length} errors)`); + } catch (e) { + this.deps.warn(`Backfill: failed to clean stale entities (continuing with upsert): ${e}`); + } + } + const { successCount, errors } = await this.dataPlaneService.upsertEntitiesBatch(tableClient, entities); if (errors.length > 0) { this.deps.warn(`Backfill: ${successCount}/${entities.length} entities synced, ${errors.length} failed`); diff --git a/vscode-extension/src/cacheManager.ts b/vscode-extension/src/cacheManager.ts index 25fd0f55..b8f1daa7 100644 --- a/vscode-extension/src/cacheManager.ts +++ b/vscode-extension/src/cacheManager.ts @@ -210,6 +210,8 @@ export class CacheManager { if (storedVersion !== this.cacheVersion) { this.deps.log(`Cache version mismatch (stored: ${storedVersion}, current: ${this.cacheVersion}) for ${cacheId}. Clearing cache.`); this.sessionFileCache = new Map(); + // Reset the clean-sync flag so the next sync deletes stale Azure entities + try { this.context.globalState.update('backend.lastCleanSyncVersion', undefined); } catch { /* best-effort */ } return; } diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index 83a3d742..2b97fe9b 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -111,7 +111,7 @@ import { class CopilotTokenTracker implements vscode.Disposable { // Cache version - increment this when making changes that require cache invalidation - private static readonly CACHE_VERSION = 31; // Fix Continue token estimation (use text length, not non-existent promptLength field) + private static readonly CACHE_VERSION = 35; // Fix CLI multi-shutdown accumulation + backfill pre-delete to clear stale Azure entities // Maximum length for displaying workspace IDs in diagnostics/customization matrix private static readonly WORKSPACE_ID_DISPLAY_LENGTH = 8; diff --git a/vscode-extension/src/usageAnalysis.ts b/vscode-extension/src/usageAnalysis.ts index 2efea00e..292d8aa3 100644 --- a/vscode-extension/src/usageAnalysis.ts +++ b/vscode-extension/src/usageAnalysis.ts @@ -1631,16 +1631,20 @@ export async function getModelUsageFromSession(deps: Pick