From 26e9305c555f84cdc542cc57686ce468c31c170c Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Thu, 5 Feb 2026 15:36:36 -0800
Subject: [PATCH 1/9] Add E2E metrics dashboard with SDK validation and error
 categorization

- Add metrics dashboard (docs/metrics/index.html) with:
  - SDK version validation against latest (including pre-release)
  - Error categorization by type (SDK, Test, Infrastructure, Agent)
  - Bugs caught by stage (pre-release, pre-checkin, post-checkin, release)
  - GitHub issue linking and PR tracking
  - Sample data for demonstration

- Add metrics collection scripts:
  - Emit-TestMetrics.ps1: Collect test results and SDK versions
  - Aggregate-Metrics.ps1: Consolidate metrics into history.json
  - Validate-SdkVersions.ps1: Check samples use latest SDK versions
  - Create-GitHubIssue.ps1: Auto-create issues for test failures

- Add e2e-metrics.yml workflow to collect metrics after E2E tests
---
 .github/workflows/e2e-metrics.yml    |  224 +++++
 docs/metrics/history.json            |  350 ++++++++
 docs/metrics/index.html              | 1214 ++++++++++++++++++++++++++
 scripts/e2e/Aggregate-Metrics.ps1    |  191 ++++
 scripts/e2e/Create-GitHubIssue.ps1   |  339 +++++++
 scripts/e2e/Emit-TestMetrics.ps1     |  247 ++++++
 scripts/e2e/Validate-SdkVersions.ps1 |  372 ++++++++
 7 files changed, 2937 insertions(+)
 create mode 100644 .github/workflows/e2e-metrics.yml
 create mode 100644 docs/metrics/history.json
 create mode 100644 docs/metrics/index.html
 create mode 100644 scripts/e2e/Aggregate-Metrics.ps1
 create mode 100644 scripts/e2e/Create-GitHubIssue.ps1
 create mode 100644 scripts/e2e/Emit-TestMetrics.ps1
 create mode 100644 scripts/e2e/Validate-SdkVersions.ps1

diff --git a/.github/workflows/e2e-metrics.yml b/.github/workflows/e2e-metrics.yml
new file mode 100644
index 00000000..056259b4
--- /dev/null
+++ b/.github/workflows/e2e-metrics.yml
@@ -0,0 +1,224 @@
+# E2E Test Metrics Collection Workflow
+# This workflow runs after E2E tests complete to collect metrics and update the dashboard
+
+name: E2E Metrics Collection
+
+on:
+  workflow_run:
+    workflows: ["E2E Test Orchestrator"]
+    types:
+      - completed
+
+  # Allow manual trigger for testing
+  workflow_dispatch:
+    inputs:
+      stage:
+        description: 'Testing stage'
+        required: true
+        default: 'scheduled'
+        type: choice
+        options:
+          - pre-release
+          - pre-checkin
+          - post-checkin
+          - release
+          - scheduled
+
+permissions:
+  contents: write
+  actions: read
+  issues: write
+
+jobs:
+  collect-metrics:
+    runs-on: ubuntu-latest
+    
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          ref: main
+          fetch-depth: 0
+
+      - name: Determine Testing Stage
+        id: stage
+        run: |
+          if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+            STAGE="${{ github.event.inputs.stage }}"
+          elif [ "${{ github.event.workflow_run.event }}" == "schedule" ]; then
+            STAGE="scheduled"
+          elif [ "${{ github.event.workflow_run.event }}" == "pull_request" ]; then
+            STAGE="pre-checkin"
+          elif [ "${{ github.event.workflow_run.head_branch }}" == "main" ]; then
+            STAGE="post-checkin"
+          else
+            STAGE="pre-checkin"
+          fi
+          echo "stage=$STAGE" >> $GITHUB_OUTPUT
+          echo "Testing stage: $STAGE"
+
+      - name: Download E2E Test Artifacts
+        if: github.event_name == 'workflow_run'
+        uses: actions/download-artifact@v4
+        with:
+          run-id: ${{ github.event.workflow_run.id }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          path: ./artifacts
+        continue-on-error: true
+
+      - name: List Downloaded Artifacts
+        run: |
+          echo "=== Downloaded Artifacts ==="
+          find ./artifacts -type f -name "*.trx" 2>/dev/null || echo "No TRX files found"
+          find ./artifacts -type f -name "*.json" 2>/dev/null || echo "No JSON files found"
+
+      - name: Extract SDK Versions from Workflow Run
+        id: sdk-versions
+        if: github.event_name == 'workflow_run'
+        run: |
+          # SDK versions are logged in workflow summaries
+          # For now, we'll extract from artifacts if available
+          echo "sdk_versions={}" >> $GITHUB_OUTPUT
+
+      - name: Process Test Results
+        id: process
+        shell: pwsh
+        run: |
+          $scriptsPath = "./scripts/e2e"
+          $artifactsPath = "./artifacts"
+          $metricsDir = "./docs/metrics/raw"
+          $historyFile = "./docs/metrics/history.json"
+          $stage = "${{ steps.stage.outputs.stage }}"
+          $runId = "${{ github.event.workflow_run.id || github.run_id }}"
+          
+          # Create metrics directory
+          New-Item -ItemType Directory -Path $metricsDir -Force | Out-Null
+          
+          # Find all test result files
+          $trxFiles = Get-ChildItem -Path $artifactsPath -Filter "*.trx" -Recurse -ErrorAction SilentlyContinue
+          
+          if ($trxFiles.Count -eq 0) {
+            Write-Host "No TRX files found in artifacts" -ForegroundColor Yellow
+            
+            # Create a placeholder metric for the run
+            $metrics = @{
+              id = "$runId-no-results"
+              runId = $runId
+              sampleName = "unknown"
+              timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
+              commitSha = "${{ github.event.workflow_run.head_sha || github.sha }}"
+              branch = "${{ github.event.workflow_run.head_branch || github.ref_name }}"
+              actor = "${{ github.event.workflow_run.actor.login || github.actor }}"
+              workflow = "${{ github.event.workflow_run.name || github.workflow }}"
+              stage = $stage
+              testResults = @{
+                status = "no-results"
+                passed = 0
+                failed = 0
+                skipped = 0
+                total = 0
+              }
+              sdkVersions = @{}
+              bugsCaught = @{
+                count = 0
+                stage = $stage
+                details = @()
+              }
+            }
+            
+            $metricsFile = Join-Path $metricsDir "$runId-no-results.json"
+            $metrics | ConvertTo-Json -Depth 10 | Out-File $metricsFile -Encoding UTF8
+            Write-Host "Created placeholder metrics file"
+          }
+          else {
+            Write-Host "Found $($trxFiles.Count) TRX files" -ForegroundColor Green
+            
+            foreach ($trxFile in $trxFiles) {
+              # Extract sample name from path
+              $pathParts = $trxFile.DirectoryName -split '[/\\]'
+              $sampleName = ($pathParts | Where-Object { $_ -match "(python|nodejs|dotnet)" }) -join "-"
+              if (-not $sampleName) { $sampleName = "unknown" }
+              
+              Write-Host "Processing: $($trxFile.Name) for sample: $sampleName" -ForegroundColor Cyan
+              
+              # Emit metrics for this test result
+              $metricsFile = Join-Path $metricsDir "$runId-$sampleName.json"
+              
+              & "$scriptsPath/Emit-TestMetrics.ps1" `
+                -SampleName $sampleName `
+                -TestResultsPath $trxFile.FullName `
+                -Stage $stage `
+                -OutputPath $metricsFile `
+                -RunId $runId `
+                -CommitSha "${{ github.event.workflow_run.head_sha || github.sha }}" `
+                -Branch "${{ github.event.workflow_run.head_branch || github.ref_name }}"
+            }
+          }
+          
+          # Aggregate all metrics
+          Write-Host "Aggregating metrics..." -ForegroundColor Cyan
+          & "$scriptsPath/Aggregate-Metrics.ps1" `
+            -MetricsDir $metricsDir `
+            -HistoryFile $historyFile
+          
+          # Output metrics updated flag
+          echo "metrics_updated=true" >> $env:GITHUB_OUTPUT
+          
+          # Check for failures that need GitHub issues
+          $rawFiles = Get-ChildItem -Path $metricsDir -Filter "*.json" -ErrorAction SilentlyContinue
+          $hasFailures = $false
+          foreach ($file in $rawFiles) {
+            $data = Get-Content $file.FullName | ConvertFrom-Json
+            if ($data.testResults.failed -gt 0) {
+              $hasFailures = $true
+              echo "has_failures=true" >> $env:GITHUB_OUTPUT
+              echo "failed_metrics_file=$($file.FullName)" >> $env:GITHUB_OUTPUT
+              break
+            }
+          }
+          
+          # Clean up raw metrics (they're now in history)
+          Remove-Item -Path $metricsDir -Recurse -Force -ErrorAction SilentlyContinue
+
+      - name: Commit Metrics Update
+        if: steps.process.outputs.metrics_updated == 'true'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          
+          git add docs/metrics/history.json
+          
+          # Check if there are changes to commit
+          if git diff --staged --quiet; then
+            echo "No changes to commit"
+          else
+            git commit -m "📊 Update E2E test metrics [skip ci]"
+            git push origin main
+            echo "✅ Metrics committed and pushed"
+          fi
+
+      - name: Create GitHub Issues for Failures
+        if: steps.process.outputs.has_failures == 'true'
+        shell: pwsh
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          $scriptsPath = "./scripts/e2e"
+          $metricsFile = "${{ steps.process.outputs.failed_metrics_file }}"
+          
+          if (Test-Path $metricsFile) {
+            Write-Host "Creating GitHub issues for failures..." -ForegroundColor Cyan
+            & "$scriptsPath/Create-GitHubIssue.ps1" `
+              -MetricsFile $metricsFile `
+              -Repository "${{ github.repository }}"
+          } else {
+            Write-Host "No metrics file found for issue creation" -ForegroundColor Yellow
+          }
+
+      - name: Generate Metrics Summary
+        run: |
+          echo "## 📊 E2E Test Metrics Updated" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Testing Stage: **${{ steps.stage.outputs.stage }}**" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "View the [Metrics Dashboard](https://microsoft.github.io/Agent365-Samples/metrics/) for detailed statistics." >> $GITHUB_STEP_SUMMARY
diff --git a/docs/metrics/history.json b/docs/metrics/history.json
new file mode 100644
index 00000000..6bea09ce
--- /dev/null
+++ b/docs/metrics/history.json
@@ -0,0 +1,350 @@
+{
+  "lastUpdated": "2026-02-05T10:30:00Z",
+  "totalRuns": 12,
+  "entries": [
+    {
+      "id": "run-001",
+      "timestamp": "2026-02-05T10:30:00Z",
+      "stage": "pre-release",
+      "sampleName": "python-openai",
+      "sdkVersions": {
+        "microsoft-agents-a365-tooling": "0.9.0-preview.3",
+        "microsoft-agents-core": "0.9.0-preview.3"
+      },
+      "sdkValidation": {
+        "allUpToDate": true,
+        "packagesChecked": 2,
+        "upToDate": 2,
+        "outdated": 0,
+        "usingPreRelease": 2,
+        "packages": [
+          {
+            "package": "microsoft-agents-a365-tooling",
+            "installed": "0.9.0-preview.3",
+            "latest": "0.9.0-preview.3",
+            "isUpToDate": true,
+            "isPreRelease": true
+          },
+          {
+            "package": "microsoft-agents-core",
+            "installed": "0.9.0-preview.3",
+            "latest": "0.9.0-preview.3",
+            "isUpToDate": true,
+            "isPreRelease": true
+          }
+        ]
+      },
+      "testResults": {
+        "status": "failed",
+        "total": 5,
+        "passed": 3,
+        "failed": 2,
+        "skipped": 0
+      },
+      "bugsCaught": {
+        "count": 2,
+        "details": [
+          {
+            "testName": "test_mcp_email_tools_registration",
+            "errorMessage": "McpToolset connection timeout with new SDK auth changes",
+            "issueNumber": 215,
+            "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/215",
+            "issueState": "open"
+          },
+          {
+            "testName": "test_notification_handling",
+            "errorMessage": "AgentNotificationActivity missing type field in 0.9.0-preview.3",
+            "issueNumber": 216,
+            "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/216",
+            "issueState": "open"
+          }
+        ]
+      },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12345"
+    },
+    {
+      "id": "run-002",
+      "timestamp": "2026-02-04T15:22:00Z",
+      "stage": "pre-checkin",
+      "sampleName": "dotnet-semantic-kernel",
+      "sdkVersions": {
+        "Microsoft.Agents.Core": "0.8.5",
+        "Microsoft.SemanticKernel": "1.45.0"
+      },
+      "sdkValidation": {
+        "allUpToDate": true,
+        "packagesChecked": 2,
+        "upToDate": 2,
+        "outdated": 0,
+        "usingPreRelease": 0,
+        "packages": [
+          {
+            "package": "Microsoft.Agents.Core",
+            "installed": "0.8.5",
+            "latest": "0.8.5",
+            "isUpToDate": true,
+            "isPreRelease": false
+          },
+          {
+            "package": "Microsoft.SemanticKernel",
+            "installed": "1.45.0",
+            "latest": "1.45.0",
+            "isUpToDate": true,
+            "isPreRelease": false
+          }
+        ]
+      },
+      "testResults": {
+        "status": "failed",
+        "total": 6,
+        "passed": 5,
+        "failed": 1,
+        "skipped": 0
+      },
+      "bugsCaught": {
+        "count": 1,
+        "details": [
+          {
+            "testName": "test_agent_streaming_response",
+            "errorMessage": "Breaking change in SK 1.45.0 streaming API",
+            "issueNumber": 210,
+            "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/210",
+            "issueState": "closed"
+          }
+        ]
+      },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12344"
+    },
+    {
+      "id": "run-003",
+      "timestamp": "2026-02-04T08:15:00Z",
+      "stage": "release",
+      "sampleName": "nodejs-langchain",
+      "sdkVersions": {
+        "@anthropic-ai/sdk": "0.35.0",
+        "langchain": "0.3.15"
+      },
+      "sdkValidation": {
+        "allUpToDate": true,
+        "packagesChecked": 2,
+        "upToDate": 2,
+        "outdated": 0,
+        "usingPreRelease": 0,
+        "packages": [
+          {
+            "package": "@anthropic-ai/sdk",
+            "installed": "0.35.0",
+            "latest": "0.35.0",
+            "isUpToDate": true,
+            "isPreRelease": false
+          },
+          {
+            "package": "langchain",
+            "installed": "0.3.15",
+            "latest": "0.3.15",
+            "isUpToDate": true,
+            "isPreRelease": false
+          }
+        ]
+      },
+      "testResults": {
+        "status": "failed",
+        "total": 5,
+        "passed": 4,
+        "failed": 1,
+        "skipped": 0
+      },
+      "bugsCaught": {
+        "count": 1,
+        "details": [
+          {
+            "testName": "test_tool_calling_format",
+            "errorMessage": "LangChain 0.3.15 changed tool schema format",
+            "issueNumber": 208,
+            "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/208",
+            "issueState": "closed"
+          }
+        ]
+      },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12343"
+    },
+    {
+      "id": "run-004",
+      "timestamp": "2026-02-03T19:45:00Z",
+      "stage": "post-checkin",
+      "sampleName": "dotnet-semantic-kernel",
+      "sdkVersions": {
+        "Microsoft.Agents.Core": "0.8.4",
+        "Microsoft.SemanticKernel": "1.44.0"
+      },
+      "testResults": {
+        "status": "passed",
+        "total": 6,
+        "passed": 6,
+        "failed": 0,
+        "skipped": 0
+      },
+      "bugsCaught": {
+        "count": 0,
+        "details": []
+      },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12342"
+    },
+    {
+      "id": "run-005",
+      "timestamp": "2026-02-03T14:30:00Z",
+      "stage": "pre-release",
+      "sampleName": "python-openai",
+      "sdkVersions": {
+        "microsoft-agents-a365-tooling": "0.9.0-preview.2",
+        "microsoft-agents-core": "0.9.0-preview.2"
+      },
+      "testResults": {
+        "status": "failed",
+        "total": 5,
+        "passed": 4,
+        "failed": 1,
+        "skipped": 0
+      },
+      "bugsCaught": {
+        "count": 1,
+        "details": [
+          {
+            "testName": "test_azure_openai_auth",
+            "errorMessage": "DefaultAzureCredential not properly initialized in preview.2"
+          }
+        ]
+      },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12341"
+    },
+    {
+      "id": "run-006",
+      "timestamp": "2026-02-02T11:00:00Z",
+      "stage": "pre-checkin",
+      "sampleName": "nodejs-langchain",
+      "sdkVersions": {
+        "@anthropic-ai/sdk": "0.34.0",
+        "langchain": "0.3.14"
+      },
+      "sdkValidation": {
+        "allUpToDate": false,
+        "packagesChecked": 2,
+        "upToDate": 0,
+        "outdated": 2,
+        "usingPreRelease": 0,
+        "packages": [
+          {
+            "package": "@anthropic-ai/sdk",
+            "installed": "0.34.0",
+            "latest": "0.35.0",
+            "isUpToDate": false,
+            "isPreRelease": false
+          },
+          {
+            "package": "langchain",
+            "installed": "0.3.14",
+            "latest": "0.3.15",
+            "isUpToDate": false,
+            "isPreRelease": false
+          }
+        ]
+      },
+      "testResults": {
+        "status": "failed",
+        "total": 5,
+        "passed": 4,
+        "failed": 1,
+        "skipped": 0
+      },
+      "bugsCaught": {
+        "count": 1,
+        "details": [
+          {
+            "testName": "test_agent_memory_persistence",
+            "errorMessage": "Memory buffer overflow with large conversation history"
+          }
+        ]
+      },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12340"
+    }
+  ],
+  "summary": {
+    "byStage": {
+      "pre-release": { "runs": 4, "passed": 12, "failed": 3, "bugsCaught": 3 },
+      "pre-checkin": { "runs": 3, "passed": 10, "failed": 2, "bugsCaught": 2 },
+      "post-checkin": { "runs": 3, "passed": 13, "failed": 1, "bugsCaught": 1 },
+      "release": { "runs": 2, "passed": 10, "failed": 1, "bugsCaught": 1 }
+    },
+    "bySample": {
+      "dotnet-semantic-kernel": { "runs": 4, "passed": 15, "failed": 2, "bugsCaught": 2 },
+      "python-openai": { "runs": 4, "passed": 13, "failed": 3, "bugsCaught": 3 },
+      "nodejs-langchain": { "runs": 4, "passed": 17, "failed": 2, "bugsCaught": 2 }
+    },
+    "totalBugsCaught": 7,
+    "totalTestsRun": 52,
+    "totalPassed": 45,
+    "totalFailed": 7
+  },
+  "pullRequests": [
+    {
+      "number": 220,
+      "title": "Fix SK 1.45.0 streaming API breaking change",
+      "url": "https://github.com/microsoft/Agent365-Samples/pull/220",
+      "author": "anabdul",
+      "createdAt": "2026-02-04T16:00:00Z",
+      "merged": true,
+      "draft": false,
+      "fixes": [
+        {
+          "number": 210,
+          "url": "https://github.com/microsoft/Agent365-Samples/issues/210"
+        }
+      ]
+    },
+    {
+      "number": 218,
+      "title": "Update LangChain to 0.3.15 with new tool schema",
+      "url": "https://github.com/microsoft/Agent365-Samples/pull/218",
+      "author": "anabdul",
+      "createdAt": "2026-02-04T09:00:00Z",
+      "merged": true,
+      "draft": false,
+      "fixes": [
+        {
+          "number": 208,
+          "url": "https://github.com/microsoft/Agent365-Samples/issues/208"
+        }
+      ]
+    },
+    {
+      "number": 225,
+      "title": "Fix MCP toolset connection timeout issue",
+      "url": "https://github.com/microsoft/Agent365-Samples/pull/225",
+      "author": "anabdul",
+      "createdAt": "2026-02-05T11:00:00Z",
+      "merged": false,
+      "draft": false,
+      "fixes": [
+        {
+          "number": 215,
+          "url": "https://github.com/microsoft/Agent365-Samples/issues/215"
+        }
+      ]
+    },
+    {
+      "number": 226,
+      "title": "Add notification type field handling",
+      "url": "https://github.com/microsoft/Agent365-Samples/pull/226",
+      "author": "anabdul",
+      "createdAt": "2026-02-05T12:00:00Z",
+      "merged": false,
+      "draft": true,
+      "fixes": [
+        {
+          "number": 216,
+          "url": "https://github.com/microsoft/Agent365-Samples/issues/216"
+        }
+      ]
+    }
+  ]
+}
diff --git a/docs/metrics/index.html b/docs/metrics/index.html
new file mode 100644
index 00000000..56b9b5d9
--- /dev/null
+++ b/docs/metrics/index.html
@@ -0,0 +1,1214 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Agent 365 SDK Integration Testing Dashboard</title>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+    <style>
+        :root {
+            --bg-primary: #0d1117;
+            --bg-secondary: #161b22;
+            --bg-tertiary: #21262d;
+            --text-primary: #c9d1d9;
+            --text-secondary: #8b949e;
+            --accent-green: #238636;
+            --accent-red: #da3633;
+            --accent-yellow: #d29922;
+            --accent-blue: #58a6ff;
+            --accent-purple: #a371f7;
+            --border-color: #30363d;
+        }
+
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
+            background-color: var(--bg-primary);
+            color: var(--text-primary);
+            line-height: 1.6;
+        }
+
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+
+        header {
+            text-align: center;
+            padding: 30px 0;
+            border-bottom: 1px solid var(--border-color);
+            margin-bottom: 30px;
+        }
+
+        header h1 {
+            font-size: 2.5rem;
+            margin-bottom: 10px;
+        }
+
+        header p {
+            color: var(--text-secondary);
+            font-size: 1.1rem;
+        }
+
+        .last-updated {
+            color: var(--text-secondary);
+            font-size: 0.9rem;
+            margin-top: 10px;
+        }
+
+        /* Summary Cards */
+        .summary-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 20px;
+            margin-bottom: 40px;
+        }
+
+        .card {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-radius: 8px;
+            padding: 20px;
+        }
+
+        .card h3 {
+            color: var(--text-secondary);
+            font-size: 0.9rem;
+            text-transform: uppercase;
+            margin-bottom: 10px;
+        }
+
+        .card .value {
+            font-size: 2.5rem;
+            font-weight: bold;
+        }
+
+        .card .value.green { color: var(--accent-green); }
+        .card .value.red { color: var(--accent-red); }
+        .card .value.yellow { color: var(--accent-yellow); }
+        .card .value.blue { color: var(--accent-blue); }
+        .card .value.purple { color: var(--accent-purple); }
+
+        .card .subtitle {
+            color: var(--text-secondary);
+            font-size: 0.85rem;
+            margin-top: 5px;
+        }
+
+        /* Stage Cards - Key Metrics */
+        .stage-cards {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+            gap: 20px;
+            margin-bottom: 40px;
+        }
+
+        .stage-card {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-radius: 8px;
+            padding: 25px;
+            position: relative;
+            overflow: hidden;
+        }
+
+        .stage-card::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            right: 0;
+            height: 4px;
+        }
+
+        .stage-card.pre-release::before { background: var(--accent-blue); }
+        .stage-card.pre-checkin::before { background: var(--accent-green); }
+        .stage-card.post-checkin::before { background: var(--accent-yellow); }
+        .stage-card.release::before { background: var(--accent-red); }
+
+        .stage-card h3 {
+            font-size: 1.2rem;
+            margin-bottom: 15px;
+            display: flex;
+            align-items: center;
+            gap: 10px;
+        }
+
+        .stage-card .bugs-caught {
+            font-size: 3rem;
+            font-weight: bold;
+            margin-bottom: 10px;
+        }
+
+        .stage-card .description {
+            color: var(--text-secondary);
+            font-size: 0.9rem;
+            margin-bottom: 15px;
+        }
+
+        .stage-card .sdk-versions {
+            background: var(--bg-tertiary);
+            border-radius: 6px;
+            padding: 10px;
+            font-size: 0.85rem;
+        }
+
+        .stage-card .sdk-versions h4 {
+            color: var(--text-secondary);
+            font-size: 0.8rem;
+            margin-bottom: 8px;
+        }
+
+        .sdk-tag {
+            display: inline-block;
+            background: var(--bg-primary);
+            border: 1px solid var(--border-color);
+            border-radius: 4px;
+            padding: 2px 8px;
+            font-size: 0.8rem;
+            margin: 2px;
+            font-family: monospace;
+        }
+
+        .sdk-tag.buggy {
+            border-color: var(--accent-red);
+            color: var(--accent-red);
+        }
+
+        /* Charts Section */
+        .charts-section {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
+            gap: 20px;
+            margin-bottom: 40px;
+        }
+
+        .chart-card {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-radius: 8px;
+            padding: 20px;
+        }
+
+        .chart-card h3 {
+            margin-bottom: 15px;
+            font-size: 1.1rem;
+        }
+
+        .chart-container {
+            position: relative;
+            height: 300px;
+        }
+
+        /* SDK Version Table */
+        .sdk-section {
+            margin-bottom: 40px;
+        }
+
+        .sdk-section h2 {
+            margin-bottom: 20px;
+            font-size: 1.5rem;
+        }
+
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            background: var(--bg-secondary);
+            border-radius: 8px;
+            overflow: hidden;
+        }
+
+        th, td {
+            padding: 15px;
+            text-align: left;
+            border-bottom: 1px solid var(--border-color);
+        }
+
+        th {
+            background: var(--bg-tertiary);
+            color: var(--text-secondary);
+            font-weight: 600;
+            text-transform: uppercase;
+            font-size: 0.85rem;
+        }
+
+        tr:hover {
+            background: var(--bg-tertiary);
+        }
+
+        .status-badge {
+            display: inline-block;
+            padding: 4px 12px;
+            border-radius: 20px;
+            font-size: 0.85rem;
+            font-weight: 500;
+        }
+
+        .status-badge.passed {
+            background: rgba(35, 134, 54, 0.2);
+            color: var(--accent-green);
+        }
+
+        .status-badge.failed {
+            background: rgba(218, 54, 51, 0.2);
+            color: var(--accent-red);
+        }
+
+        .status-badge.warning {
+            background: rgba(210, 153, 34, 0.2);
+            color: var(--accent-yellow);
+        }
+
+        .status-badge.prerelease {
+            background: rgba(163, 113, 247, 0.2);
+            color: var(--accent-purple);
+        }
+
+        .version-compare {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+
+        .version-compare .arrow {
+            color: var(--text-secondary);
+        }
+
+        .version-compare .outdated {
+            color: var(--accent-yellow);
+        }
+
+        .version-compare .current {
+            color: var(--accent-green);
+        }
+
+        .version-compare .prerelease-badge {
+            font-size: 0.7rem;
+            background: var(--accent-purple);
+            color: white;
+            padding: 2px 6px;
+            border-radius: 3px;
+            margin-left: 4px;
+        }
+
+        /* Bug Details */
+        .bug-list {
+            margin-top: 20px;
+        }
+
+        .bug-item {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-left: 4px solid var(--accent-red);
+            border-radius: 8px;
+            padding: 15px 20px;
+            margin-bottom: 10px;
+        }
+
+        .bug-item .bug-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 10px;
+        }
+
+        .bug-item .bug-stage {
+            font-size: 0.85rem;
+            color: var(--text-secondary);
+        }
+
+        .bug-item .bug-sdk {
+            font-family: monospace;
+            background: var(--bg-tertiary);
+            padding: 2px 8px;
+            border-radius: 4px;
+            font-size: 0.85rem;
+        }
+
+        .bug-item .bug-test {
+            font-weight: 600;
+            margin-bottom: 5px;
+        }
+
+        .bug-item .bug-error {
+            color: var(--text-secondary);
+            font-size: 0.9rem;
+        }
+
+        /* Error Categories Table */
+        .error-category-row {
+            border-left: 4px solid transparent;
+        }
+
+        .error-category-row.sdk-auth { border-left-color: #da3633; }
+        .error-category-row.sdk-connection { border-left-color: #d29922; }
+        .error-category-row.sdk-config { border-left-color: #58a6ff; }
+        .error-category-row.sdk-breaking { border-left-color: #f85149; }
+        .error-category-row.sdk-type { border-left-color: #a371f7; }
+        .error-category-row.test-assertion { border-left-color: #8b949e; }
+        .error-category-row.other { border-left-color: #6e7681; }
+
+        .category-badge {
+            display: inline-flex;
+            align-items: center;
+            gap: 6px;
+            padding: 4px 10px;
+            border-radius: 4px;
+            font-size: 0.85rem;
+            font-weight: 500;
+        }
+
+        .category-badge.sdk { background: rgba(218, 54, 51, 0.15); color: #f85149; }
+        .category-badge.test { background: rgba(139, 148, 158, 0.15); color: #8b949e; }
+        .category-badge.infra { background: rgba(210, 153, 34, 0.15); color: #d29922; }
+        .category-badge.other { background: rgba(110, 118, 129, 0.15); color: #6e7681; }
+
+        /* Issue Links */
+        .issue-link {
+            display: inline-flex;
+            align-items: center;
+            gap: 4px;
+            padding: 3px 8px;
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border-color);
+            border-radius: 4px;
+            color: var(--accent-blue);
+            text-decoration: none;
+            font-size: 0.85rem;
+            transition: background 0.2s;
+        }
+
+        .issue-link:hover {
+            background: var(--bg-primary);
+            text-decoration: underline;
+        }
+
+        .issue-link.open { border-color: var(--accent-green); }
+        .issue-link.closed { border-color: var(--accent-purple); color: var(--accent-purple); }
+
+        /* PR Section */
+        .pr-item {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-left: 4px solid var(--accent-purple);
+            border-radius: 8px;
+            padding: 15px 20px;
+            margin-bottom: 10px;
+        }
+
+        .pr-item.merged { border-left-color: var(--accent-purple); }
+        .pr-item.open { border-left-color: var(--accent-green); }
+        .pr-item.draft { border-left-color: var(--text-secondary); }
+
+        .pr-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 8px;
+        }
+
+        .pr-title {
+            font-weight: 600;
+            color: var(--accent-blue);
+            text-decoration: none;
+        }
+
+        .pr-title:hover {
+            text-decoration: underline;
+        }
+
+        .pr-meta {
+            display: flex;
+            gap: 15px;
+            font-size: 0.85rem;
+            color: var(--text-secondary);
+        }
+
+        .pr-fixes {
+            display: flex;
+            gap: 8px;
+            margin-top: 10px;
+            flex-wrap: wrap;
+        }
+
+        /* Impact Bar */
+        .impact-bar {
+            height: 6px;
+            background: var(--bg-tertiary);
+            border-radius: 3px;
+            overflow: hidden;
+        }
+
+        .impact-bar-fill {
+            height: 100%;
+            border-radius: 3px;
+            transition: width 0.3s ease;
+        }
+
+        .impact-bar-fill.low { background: var(--accent-green); }
+        .impact-bar-fill.medium { background: var(--accent-yellow); }
+        .impact-bar-fill.high { background: var(--accent-red); }
+
+        /* Footer */
+        footer {
+            text-align: center;
+            padding: 30px;
+            color: var(--text-secondary);
+            border-top: 1px solid var(--border-color);
+            margin-top: 40px;
+        }
+
+        /* Value Proposition */
+        .value-prop {
+            background: linear-gradient(135deg, var(--bg-secondary), var(--bg-tertiary));
+            border: 1px solid var(--border-color);
+            border-radius: 12px;
+            padding: 30px;
+            margin-bottom: 40px;
+            text-align: center;
+        }
+
+        .value-prop h2 {
+            font-size: 1.8rem;
+            margin-bottom: 15px;
+        }
+
+        .value-prop .highlight {
+            color: var(--accent-green);
+            font-size: 3rem;
+            font-weight: bold;
+        }
+
+        .value-prop p {
+            color: var(--text-secondary);
+            max-width: 600px;
+            margin: 0 auto;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <header>
+            <h1>🛡️ Agent 365 SDK Integration Testing</h1>
+            <p>Catching SDK issues before they reach production</p>
+            <div class="last-updated" id="lastUpdated">Loading...</div>
+        </header>
+
+        <!-- Value Proposition -->
+        <div class="value-prop">
+            <h2>Bugs Caught Before Production</h2>
+            <div class="highlight" id="totalBugsCaught">0</div>
+            <p>SDK integration issues detected and prevented from reaching customers through automated E2E testing</p>
+        </div>
+
+        <!-- Stage Cards - Main Focus -->
+        <div class="stage-cards">
+            <div class="stage-card pre-release">
+                <h3>🔬 Pre-Release Testing</h3>
+                <div class="bugs-caught" id="preReleaseBugs">0</div>
+                <div class="description">Bugs caught testing pre-release SDK versions before they're published</div>
+                <div class="sdk-versions">
+                    <h4>SDK Versions Tested</h4>
+                    <div id="preReleaseSdkList">No data yet</div>
+                </div>
+            </div>
+
+            <div class="stage-card pre-checkin">
+                <h3>📝 Pre-Checkin (PR)</h3>
+                <div class="bugs-caught" id="preCheckinBugs">0</div>
+                <div class="description">Bugs caught in pull requests before code is merged</div>
+                <div class="sdk-versions">
+                    <h4>SDK Versions Tested</h4>
+                    <div id="preCheckinSdkList">No data yet</div>
+                </div>
+            </div>
+
+            <div class="stage-card post-checkin">
+                <h3>✅ Post-Checkin (Main)</h3>
+                <div class="bugs-caught" id="postCheckinBugs">0</div>
+                <div class="description">Bugs caught after merge - regressions detected</div>
+                <div class="sdk-versions">
+                    <h4>SDK Versions Tested</h4>
+                    <div id="postCheckinSdkList">No data yet</div>
+                </div>
+            </div>
+
+            <div class="stage-card release">
+                <h3>🚀 Release Validation</h3>
+                <div class="bugs-caught" id="releaseBugs">0</div>
+                <div class="description">Final validation before SDK release to production</div>
+                <div class="sdk-versions">
+                    <h4>SDK Versions Validated</h4>
+                    <div id="releaseSdkList">No data yet</div>
+                </div>
+            </div>
+        </div>
+
+        <!-- Charts -->
+        <div class="charts-section">
+            <div class="chart-card">
+                <h3>📊 Bugs Caught by Testing Stage</h3>
+                <div class="chart-container">
+                    <canvas id="bugsByStageChart"></canvas>
+                </div>
+            </div>
+            <div class="chart-card">
+                <h3>📦 SDK Test Results by Package</h3>
+                <div class="chart-container">
+                    <canvas id="sdkVersionChart"></canvas>
+                </div>
+            </div>
+        </div>
+
+        <!-- Error Categories Table -->
+        <div class="sdk-section">
+            <h2>🔍 Error Categories</h2>
+            <p style="color: var(--text-secondary); margin-bottom: 15px;">
+                Categorized errors to identify patterns and SDK issues
+            </p>
+            <table id="errorCategoryTable">
+                <thead>
+                    <tr>
+                        <th>Issue Type</th>
+                        <th>Failure Count</th>
+                        <th>Affected Samples</th>
+                        <th>Failure %</th>
+                        <th>Impact</th>
+                        <th>Linked Issues</th>
+                    </tr>
+                </thead>
+                <tbody id="errorCategoryTableBody">
+                    <!-- Populated by JS -->
+                </tbody>
+            </table>
+        </div>
+
+        <!-- SDK Version Tracking Table -->
+        <div class="sdk-section">
+            <h2>📦 SDK Version Validation</h2>
+            <p style="color: var(--text-secondary); margin-bottom: 15px;">
+                Confirming E2E tests use the latest SDK versions (including pre-release) to catch issues early
+            </p>
+            <table id="sdkTable">
+                <thead>
+                    <tr>
+                        <th>SDK Package</th>
+                        <th>Installed</th>
+                        <th>Latest Available</th>
+                        <th>Test Runs</th>
+                        <th>Bugs Caught</th>
+                        <th>Status</th>
+                    </tr>
+                </thead>
+                <tbody id="sdkTableBody">
+                    <!-- Populated by JS -->
+                </tbody>
+            </table>
+        </div>
+
+        <!-- Recent Bugs Caught -->
+        <div class="sdk-section">
+            <h2>🐛 Recent Bugs Caught</h2>
+            <div class="bug-list" id="bugList">
+                <p style="color: var(--text-secondary); text-align: center; padding: 40px;">
+                    No bugs caught yet. When E2E tests detect issues, they'll appear here.
+                </p>
+            </div>
+        </div>
+
+        <!-- Pull Requests Section -->
+        <div class="sdk-section">
+            <h2>🔀 Related Pull Requests</h2>
+            <p style="color: var(--text-secondary); margin-bottom: 15px;">
+                PRs created to fix E2E test failures
+            </p>
+            <div id="prList">
+                <p style="color: var(--text-secondary); text-align: center; padding: 40px;">
+                    No related PRs yet. When fixes are submitted, they'll appear here.
+                </p>
+            </div>
+        </div>
+
+        <!-- Sample Breakdown -->
+        <div class="sdk-section">
+            <h2>🧪 Sample Test Results</h2>
+            <table id="sampleTable">
+                <thead>
+                    <tr>
+                        <th>Sample</th>
+                        <th>SDK Version</th>
+                        <th>Total Runs</th>
+                        <th>Bugs Caught</th>
+                        <th>Success Rate</th>
+                    </tr>
+                </thead>
+                <tbody id="sampleTableBody">
+                    <!-- Populated by JS -->
+                </tbody>
+            </table>
+        </div>
+
+        <footer>
+            <p>Agent 365 SDK Integration Testing Dashboard</p>
+            <p>Automated E2E testing ensures SDK compatibility across all sample implementations</p>
+        </footer>
+    </div>
+
+    <script>
+        // Chart.js default styling
+        Chart.defaults.color = '#8b949e';
+        Chart.defaults.borderColor = '#30363d';
+
+        let metricsData = null;
+
+        async function loadMetrics() {
+            try {
+                const response = await fetch('history.json');
+                if (!response.ok) {
+                    throw new Error('Failed to load metrics data');
+                }
+                metricsData = await response.json();
+                renderDashboard();
+            } catch (error) {
+                console.error('Error loading metrics:', error);
+                document.getElementById('lastUpdated').textContent = 'Waiting for E2E test data...';
+                document.getElementById('lastUpdated').style.color = '#d29922';
+            }
+        }
+
+        function renderDashboard() {
+            if (!metricsData) return;
+
+            // Update last updated
+            document.getElementById('lastUpdated').textContent = 
+                `Last updated: ${new Date(metricsData.lastUpdated).toLocaleString()}`;
+
+            // Update total bugs caught
+            document.getElementById('totalBugsCaught').textContent = 
+                metricsData.summary.totalBugsCaught.toLocaleString();
+
+            // Update stage cards
+            const stages = metricsData.summary.byStage;
+            document.getElementById('preReleaseBugs').textContent = stages['pre-release']?.bugsCaught || 0;
+            document.getElementById('preCheckinBugs').textContent = stages['pre-checkin']?.bugsCaught || 0;
+            document.getElementById('postCheckinBugs').textContent = stages['post-checkin']?.bugsCaught || 0;
+            document.getElementById('releaseBugs').textContent = stages['release']?.bugsCaught || 0;
+
+            // Update SDK version lists for each stage
+            renderSdkListForStage('pre-release', 'preReleaseSdkList');
+            renderSdkListForStage('pre-checkin', 'preCheckinSdkList');
+            renderSdkListForStage('post-checkin', 'postCheckinSdkList');
+            renderSdkListForStage('release', 'releaseSdkList');
+
+            // Render charts
+            renderBugsByStageChart();
+            renderSdkVersionChart();
+
+            // Render tables
+            renderErrorCategoryTable();
+            renderSdkTable();
+            renderSampleTable();
+
+            // Render lists
+            renderBugList();
+            renderPRList();
+        }
+
+        function renderSdkListForStage(stage, elementId) {
+            const container = document.getElementById(elementId);
+            
+            // Find entries for this stage
+            const stageEntries = metricsData.entries.filter(e => e.stage === stage);
+            
+            // Collect unique SDK versions
+            const sdkVersions = new Map();
+            stageEntries.forEach(entry => {
+                if (entry.sdkVersions) {
+                    Object.entries(entry.sdkVersions).forEach(([pkg, version]) => {
+                        const key = `${pkg}@${version}`;
+                        if (!sdkVersions.has(key)) {
+                            sdkVersions.set(key, {
+                                pkg,
+                                version,
+                                hadBugs: entry.testResults.failed > 0
+                            });
+                        } else if (entry.testResults.failed > 0) {
+                            sdkVersions.get(key).hadBugs = true;
+                        }
+                    });
+                }
+            });
+
+            if (sdkVersions.size === 0) {
+                container.innerHTML = '<span style="color: var(--text-secondary)">No data yet</span>';
+                return;
+            }
+
+            container.innerHTML = Array.from(sdkVersions.values())
+                .map(sdk => `<span class="sdk-tag ${sdk.hadBugs ? 'buggy' : ''}">${sdk.pkg} ${sdk.version}</span>`)
+                .join('');
+        }
+
+        function renderBugsByStageChart() {
+            const ctx = document.getElementById('bugsByStageChart').getContext('2d');
+            const stageData = metricsData.summary.byStage;
+            
+            const data = [
+                stageData['pre-release']?.bugsCaught || 0,
+                stageData['pre-checkin']?.bugsCaught || 0,
+                stageData['post-checkin']?.bugsCaught || 0,
+                stageData['release']?.bugsCaught || 0
+            ];
+
+            new Chart(ctx, {
+                type: 'bar',
+                data: {
+                    labels: ['Pre-Release', 'Pre-Checkin', 'Post-Checkin', 'Release'],
+                    datasets: [{
+                        label: 'Bugs Caught',
+                        data: data,
+                        backgroundColor: ['#58a6ff', '#238636', '#d29922', '#da3633'],
+                        borderWidth: 0,
+                        borderRadius: 4
+                    }]
+                },
+                options: {
+                    responsive: true,
+                    maintainAspectRatio: false,
+                    plugins: {
+                        legend: { display: false }
+                    },
+                    scales: {
+                        y: {
+                            beginAtZero: true,
+                            ticks: { stepSize: 1 }
+                        }
+                    }
+                }
+            });
+        }
+
+        function renderSdkVersionChart() {
+            const ctx = document.getElementById('sdkVersionChart').getContext('2d');
+            
+            // Group by SDK package and show pass/fail for each
+            const packageData = {};
+            metricsData.entries.forEach(entry => {
+                if (entry.sdkVersions) {
+                    Object.entries(entry.sdkVersions).forEach(([pkg, version]) => {
+                        // Shorten package name for display
+                        const shortPkg = pkg.replace('microsoft-agents-', 'ms-')
+                                            .replace('Microsoft.', '')
+                                            .replace('@anthropic-ai/', '');
+                        const label = `${shortPkg}@${version}`;
+                        
+                        if (!packageData[label]) {
+                            packageData[label] = { passed: 0, failed: 0, pkg: shortPkg, version };
+                        }
+                        packageData[label].passed += entry.testResults.passed;
+                        packageData[label].failed += entry.testResults.failed;
+                    });
+                }
+            });
+
+            // Sort by total tests (most active first) and take top packages
+            const sortedPackages = Object.entries(packageData)
+                .sort((a, b) => (b[1].passed + b[1].failed) - (a[1].passed + a[1].failed))
+                .slice(0, 8);
+            
+            const labels = sortedPackages.map(([label]) => label);
+            const passedData = sortedPackages.map(([, data]) => data.passed);
+            const failedData = sortedPackages.map(([, data]) => data.failed);
+
+            if (labels.length === 0) {
+                ctx.font = '14px sans-serif';
+                ctx.fillStyle = '#8b949e';
+                ctx.textAlign = 'center';
+                ctx.fillText('No SDK version data yet', ctx.canvas.width / 2, ctx.canvas.height / 2);
+                return;
+            }
+
+            new Chart(ctx, {
+                type: 'bar',
+                data: {
+                    labels: labels,
+                    datasets: [
+                        {
+                            label: 'Tests Passed',
+                            data: passedData,
+                            backgroundColor: '#238636',
+                            borderRadius: 4
+                        },
+                        {
+                            label: 'Tests Failed',
+                            data: failedData,
+                            backgroundColor: '#da3633',
+                            borderRadius: 4
+                        }
+                    ]
+                },
+                options: {
+                    indexAxis: 'y',  // Horizontal bars for better readability
+                    responsive: true,
+                    maintainAspectRatio: false,
+                    plugins: {
+                        legend: { position: 'bottom' }
+                    },
+                    scales: {
+                        x: { stacked: true, beginAtZero: true },
+                        y: { stacked: true }
+                    }
+                }
+            });
+        }
+                    }
+                }
+            });
+        }
+
+        function renderSdkTable() {
+            const tbody = document.getElementById('sdkTableBody');
+            
+            // Aggregate SDK version stats
+            const sdkStats = {};
+            metricsData.entries.forEach(entry => {
+                if (entry.sdkVersions) {
+                    Object.entries(entry.sdkVersions).forEach(([pkg, version]) => {
+                        const key = `${pkg}|${version}`;
+                        if (!sdkStats[key]) {
+                            sdkStats[key] = { pkg, version, runs: 0, passed: 0, failed: 0 };
+                        }
+                        sdkStats[key].runs++;
+                        sdkStats[key].passed += entry.testResults.passed;
+                        sdkStats[key].failed += entry.testResults.failed;
+                    });
+                }
+            });
+
+            const rows = Object.values(sdkStats);
+            
+            if (rows.length === 0) {
+                tbody.innerHTML = `
+                    <tr>
+                        <td colspan="6" style="text-align: center; color: var(--text-secondary); padding: 40px;">
+                            SDK version data will appear after E2E tests run
+                        </td>
+                    </tr>
+                `;
+                return;
+            }
+
+            // Get SDK validation data from most recent entries
+            const sdkValidation = {};
+            metricsData.entries.forEach(entry => {
+                if (entry.sdkValidation && entry.sdkValidation.packages) {
+                    entry.sdkValidation.packages.forEach(pkg => {
+                        if (!sdkValidation[pkg.package] || entry.timestamp > sdkValidation[pkg.package].timestamp) {
+                            sdkValidation[pkg.package] = {
+                                latest: pkg.latest,
+                                isUpToDate: pkg.isUpToDate,
+                                isPreRelease: pkg.isPreRelease,
+                                timestamp: entry.timestamp
+                            };
+                        }
+                    });
+                }
+            });
+
+            tbody.innerHTML = rows.map(sdk => {
+                const validation = sdkValidation[sdk.pkg];
+                const latestVersion = validation?.latest || 'checking...';
+                const isUpToDate = validation?.isUpToDate ?? true;
+                const isPreRelease = validation?.isPreRelease ?? false;
+                const bugsCaught = sdk.failed;
+                
+                // Determine status
+                let statusBadge, statusText;
+                if (!isUpToDate) {
+                    statusBadge = 'warning';
+                    statusText = '⚠️ Outdated';
+                } else if (isPreRelease) {
+                    statusBadge = 'prerelease';
+                    statusText = '🔬 Pre-release';
+                } else if (bugsCaught > 0) {
+                    statusBadge = 'failed';
+                    statusText = `🐛 ${bugsCaught} bugs`;
+                } else {
+                    statusBadge = 'passed';
+                    statusText = '✅ Current';
+                }
+                
+                const preReleaseBadge = isPreRelease ? '<span class="prerelease-badge">PRE</span>' : '';
+                
+                return `
+                    <tr>
+                        <td><code>${sdk.pkg}</code></td>
+                        <td><code>${sdk.version}</code></td>
+                        <td>
+                            <code>${latestVersion}</code>${preReleaseBadge}
+                        </td>
+                        <td>${sdk.runs}</td>
+                        <td><span style="color: ${bugsCaught > 0 ? 'var(--accent-yellow)' : 'var(--accent-green)'}">${bugsCaught}</span></td>
+                        <td><span class="status-badge ${statusBadge}">${statusText}</span></td>
+                    </tr>
+                `;
+            }).join('');
+        }
+
+        function renderSampleTable() {
+            const tbody = document.getElementById('sampleTableBody');
+            const sampleData = metricsData.summary.bySample;
+
+            if (Object.keys(sampleData).length === 0) {
+                tbody.innerHTML = `
+                    <tr>
+                        <td colspan="5" style="text-align: center; color: var(--text-secondary); padding: 40px;">
+                            Sample test data will appear after E2E tests run
+                        </td>
+                    </tr>
+                `;
+                return;
+            }
+
+            tbody.innerHTML = Object.entries(sampleData).map(([sample, data]) => {
+                const total = data.passed + data.failed;
+                const successRate = total > 0 ? ((data.passed / total) * 100).toFixed(1) : 100;
+                
+                // Find latest SDK version for this sample
+                const sampleEntry = metricsData.entries.find(e => e.sampleName === sample);
+                const sdkVersion = sampleEntry?.sdkVersions ? 
+                    Object.entries(sampleEntry.sdkVersions).map(([k,v]) => `${v}`).join(', ') : 
+                    'Unknown';
+                
+                return `
+                    <tr>
+                        <td>${sample}</td>
+                        <td><code>${sdkVersion}</code></td>
+                        <td>${data.runs}</td>
+                        <td><span style="color: ${data.bugsCaught > 0 ? 'var(--accent-yellow)' : 'var(--accent-green)'}">${data.bugsCaught}</span></td>
+                        <td>
+                            <span class="status-badge ${successRate >= 90 ? 'passed' : 'failed'}">
+                                ${successRate}%
+                            </span>
+                        </td>
+                    </tr>
+                `;
+            }).join('');
+        }
+
+        function renderBugList() {
+            const container = document.getElementById('bugList');
+            
+            // Find entries with bugs
+            const bugsFound = metricsData.entries
+                .filter(e => e.bugsCaught && e.bugsCaught.count > 0)
+                .slice(0, 10);
+
+            if (bugsFound.length === 0) {
+                container.innerHTML = `
+                    <p style="color: var(--text-secondary); text-align: center; padding: 40px;">
+                        ✅ No bugs caught yet. When E2E tests detect SDK issues, they'll appear here.
+                    </p>
+                `;
+                return;
+            }
+
+            container.innerHTML = bugsFound.map(entry => {
+                const sdkInfo = entry.sdkVersions ? 
+                    Object.entries(entry.sdkVersions).map(([k,v]) => `${k}@${v}`).join(', ') : 
+                    'Unknown';
+                
+                return entry.bugsCaught.details.map(bug => `
+                    <div class="bug-item">
+                        <div class="bug-header">
+                            <span class="bug-stage">${getStageLabel(entry.stage)} • ${entry.sampleName}</span>
+                            <span class="bug-sdk">${sdkInfo}</span>
+                        </div>
+                        <div class="bug-test">${bug.testName}</div>
+                        <div class="bug-error">${bug.errorMessage || 'Test failed'}</div>
+                    </div>
+                `).join('');
+            }).join('');
+        }
+
+        function getStageLabel(stage) {
+            const labels = {
+                'pre-release': '🔬 Pre-Release',
+                'pre-checkin': '📝 Pre-Checkin',
+                'post-checkin': '✅ Post-Checkin',
+                'release': '🚀 Release'
+            };
+            return labels[stage] || stage;
+        }
+
+        // Error category classification patterns - comprehensive coverage
+        const errorPatterns = {
+            'SDK:Authentication': ['authentication', 'auth failed', 'unauthorized', '401', 'credential', 'token', 'api key', 'secret', 'permission denied', 'access denied', 'forbidden', '403'],
+            'SDK:Connection': ['connection', 'timeout', 'network', 'socket', 'ECONNREFUSED', 'ETIMEDOUT', 'connection refused', 'connection reset', 'dns', 'resolve', 'unreachable'],
+            'SDK:Configuration': ['configuration', 'config', 'missing', 'environment variable', 'appsettings', 'not configured', 'invalid setting', 'required field', 'missing field'],
+            'SDK:BreakingChange': ['breaking change', 'deprecated', 'removed', 'api changed', 'schema', 'incompatible', 'version mismatch', 'not supported', 'no longer'],
+            'SDK:TypeMismatch': ['type error', 'cannot convert', 'invalid cast', 'type mismatch', 'expected type', 'wrong type', 'serialization', 'deserialization', 'json', 'parse error'],
+            'SDK:MissingDependency': ['module not found', 'package not found', 'import error', 'could not load', 'dependency', 'not installed', 'missing module', 'no module named'],
+            'SDK:RateLimiting': ['rate limit', 'throttl', '429', 'too many requests', 'quota exceeded', 'limit exceeded'],
+            'SDK:ResourceNotFound': ['not found', '404', 'does not exist', 'no such', 'missing resource', 'resource not found'],
+            'Test:Assertion': ['assert', 'expected', 'should be', 'to equal', 'to be', 'not equal', 'mismatch', 'failed assertion'],
+            'Test:Timeout': ['test timeout', 'exceeded time', 'took too long', 'deadline exceeded', 'operation timed out'],
+            'Infrastructure:Service': ['service unavailable', '503', '502', 'bad gateway', 'server error', '500', 'internal server error'],
+            'Infrastructure:Memory': ['out of memory', 'memory', 'heap', 'stack overflow', 'allocation failed'],
+            'Agent:ToolExecution': ['tool execution', 'tool failed', 'function call', 'tool error', 'mcp', 'toolset'],
+            'Agent:LLMResponse': ['llm', 'model', 'completion', 'chat', 'response generation', 'openai', 'azure openai']
+        };
+
+        function categorizeError(errorMessage) {
+            const lowerMessage = (errorMessage || '').toLowerCase();
+            for (const [category, patterns] of Object.entries(errorPatterns)) {
+                for (const pattern of patterns) {
+                    if (lowerMessage.includes(pattern)) {
+                        return category;
+                    }
+                }
+            }
+            // Default to most likely category based on context
+            return 'Test:Assertion';
+        }
+
+        function getCategoryBadgeClass(category) {
+            if (category.startsWith('SDK:')) return 'sdk';
+            if (category.startsWith('Test:')) return 'test';
+            if (category.startsWith('Infrastructure:')) return 'infra';
+            return 'other';
+        }
+
+        function renderErrorCategoryTable() {
+            const tbody = document.getElementById('errorCategoryTableBody');
+            
+            // Aggregate errors by category
+            const categories = {};
+            let totalFailures = 0;
+            
+            metricsData.entries.forEach(entry => {
+                if (entry.bugsCaught && entry.bugsCaught.details) {
+                    entry.bugsCaught.details.forEach(bug => {
+                        const category = categorizeError(bug.errorMessage);
+                        totalFailures++;
+                        
+                        if (!categories[category]) {
+                            categories[category] = {
+                                count: 0,
+                                samples: new Set(),
+                                issues: [],
+                                tests: []
+                            };
+                        }
+                        categories[category].count++;
+                        categories[category].samples.add(entry.sampleName);
+                        categories[category].tests.push(bug.testName);
+                        
+                        // Add linked issue if exists
+                        if (bug.issueUrl) {
+                            categories[category].issues.push({
+                                number: bug.issueNumber,
+                                url: bug.issueUrl,
+                                state: bug.issueState || 'open'
+                            });
+                        }
+                    });
+                }
+            });
+
+            if (Object.keys(categories).length === 0) {
+                tbody.innerHTML = `
+                    <tr>
+                        <td colspan="6" style="text-align: center; color: var(--text-secondary); padding: 40px;">
+                            No error categories yet. Errors will be categorized when E2E tests detect issues.
+                        </td>
+                    </tr>
+                `;
+                return;
+            }
+
+            // Sort by count descending
+            const sortedCategories = Object.entries(categories)
+                .sort((a, b) => b[1].count - a[1].count);
+
+            tbody.innerHTML = sortedCategories.map(([category, data]) => {
+                const failurePercentage = totalFailures > 0 ? ((data.count / totalFailures) * 100).toFixed(1) : 0;
+                const impact = data.count >= 5 ? 'high' : data.count >= 2 ? 'medium' : 'low';
+                const badgeClass = getCategoryBadgeClass(category);
+                
+                // Render linked issues
+                const issueLinks = data.issues.length > 0 
+                    ? data.issues.map(i => `<a href="${i.url}" class="issue-link ${i.state}" target="_blank">#${i.number}</a>`).join('')
+                    : '<span style="color: var(--text-secondary)">—</span>';
+                
+                return `
+                    <tr class="error-category-row ${category.toLowerCase().replace(':', '-')}">
+                        <td><span class="category-badge ${badgeClass}">${category}</span></td>
+                        <td><strong>${data.count}</strong></td>
+                        <td>${data.samples.size}</td>
+                        <td>${failurePercentage}%</td>
+                        <td>
+                            <div class="impact-bar">
+                                <div class="impact-bar-fill ${impact}" style="width: ${failurePercentage}%"></div>
+                            </div>
+                        </td>
+                        <td>${issueLinks}</td>
+                    </tr>
+                `;
+            }).join('');
+        }
+
+        function renderPRList() {
+            const container = document.getElementById('prList');
+            
+            // Get PRs from metrics data
+            const prs = metricsData.pullRequests || [];
+            
+            if (prs.length === 0) {
+                container.innerHTML = `
+                    <p style="color: var(--text-secondary); text-align: center; padding: 40px;">
+                        No related PRs yet. When fixes are submitted, they'll appear here.
+                    </p>
+                `;
+                return;
+            }
+
+            container.innerHTML = prs.slice(0, 10).map(pr => {
+                const stateClass = pr.merged ? 'merged' : pr.draft ? 'draft' : 'open';
+                const stateIcon = pr.merged ? '🟣' : pr.draft ? '⚪' : '🟢';
+                
+                // Render linked issues this PR fixes
+                const fixesLinks = pr.fixes && pr.fixes.length > 0
+                    ? pr.fixes.map(i => `<a href="${i.url}" class="issue-link closed" target="_blank">#${i.number}</a>`).join('')
+                    : '';
+                
+                return `
+                    <div class="pr-item ${stateClass}">
+                        <div class="pr-header">
+                            <a href="${pr.url}" class="pr-title" target="_blank">
+                                ${stateIcon} ${pr.title}
+                            </a>
+                            <span class="status-badge ${stateClass}">${pr.merged ? 'Merged' : pr.draft ? 'Draft' : 'Open'}</span>
+                        </div>
+                        <div class="pr-meta">
+                            <span>📅 ${new Date(pr.createdAt).toLocaleDateString()}</span>
+                            <span>👤 ${pr.author}</span>
+                            <span>🏷️ #${pr.number}</span>
+                        </div>
+                        ${fixesLinks ? `<div class="pr-fixes">Fixes: ${fixesLinks}</div>` : ''}
+                    </div>
+                `;
+            }).join('');
+        }
+
+        // Load metrics on page load
+        loadMetrics();
+    </script>
+</body>
+</html>
diff --git a/scripts/e2e/Aggregate-Metrics.ps1 b/scripts/e2e/Aggregate-Metrics.ps1
new file mode 100644
index 00000000..3d43f88b
--- /dev/null
+++ b/scripts/e2e/Aggregate-Metrics.ps1
@@ -0,0 +1,191 @@
+<#
+.SYNOPSIS
+    Aggregates individual test metrics into a consolidated metrics history file.
+
+.DESCRIPTION
+    This script reads individual metric JSON files and appends them to a history file,
+    enabling historical trend analysis across multiple test runs.
+
+.PARAMETER MetricsDir
+    Directory containing individual metric JSON files
+
+.PARAMETER HistoryFile
+    Path to the consolidated history JSON file
+
+.PARAMETER MaxEntries
+    Maximum number of entries to keep in history (0 = unlimited)
+
+.EXAMPLE
+    ./Aggregate-Metrics.ps1 -MetricsDir "./metrics/raw" -HistoryFile "./docs/metrics/history.json"
+#>
+
+param(
+    [Parameter(Mandatory = $true)]
+    [string]$MetricsDir,
+
+    [Parameter(Mandatory = $true)]
+    [string]$HistoryFile,
+
+    [Parameter(Mandatory = $false)]
+    [int]$MaxEntries = 0
+)
+
+$ErrorActionPreference = "Stop"
+
+Write-Host "=== Aggregating Metrics ===" -ForegroundColor Cyan
+Write-Host "Source: $MetricsDir" -ForegroundColor Gray
+Write-Host "Target: $HistoryFile" -ForegroundColor Gray
+
+# Load existing history
+$history = @{
+    lastUpdated = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
+    totalRuns = 0
+    entries = @()
+    summary = @{
+        byStage = @{
+            "pre-release" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+            "pre-checkin" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+            "post-checkin" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+            "release" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+            "scheduled" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+        }
+        bySample = @{}
+        totalBugsCaught = 0
+        totalTestsRun = 0
+        totalPassed = 0
+        totalFailed = 0
+    }
+}
+
+if (Test-Path $HistoryFile) {
+    try {
+        $existingHistory = Get-Content $HistoryFile -Raw | ConvertFrom-Json -AsHashtable
+        if ($existingHistory.entries) {
+            $history.entries = $existingHistory.entries
+            $history.summary = $existingHistory.summary
+        }
+        Write-Host "Loaded existing history with $($history.entries.Count) entries" -ForegroundColor Green
+    }
+    catch {
+        Write-Host "Warning: Could not load existing history, starting fresh: $_" -ForegroundColor Yellow
+    }
+}
+
+# Get existing entry IDs to avoid duplicates
+$existingIds = @{}
+foreach ($entry in $history.entries) {
+    if ($entry.id) {
+        $existingIds[$entry.id] = $true
+    }
+}
+
+# Read new metrics files
+$newEntries = @()
+if (Test-Path $MetricsDir) {
+    $metricFiles = Get-ChildItem -Path $MetricsDir -Filter "*.json" -File
+    
+    foreach ($file in $metricFiles) {
+        try {
+            $metrics = Get-Content $file.FullName -Raw | ConvertFrom-Json -AsHashtable
+            
+            # Skip if already in history
+            if ($existingIds.ContainsKey($metrics.id)) {
+                Write-Host "Skipping duplicate: $($metrics.id)" -ForegroundColor Gray
+                continue
+            }
+            
+            $newEntries += $metrics
+            Write-Host "Adding: $($metrics.sampleName) - $($metrics.stage) - $($metrics.testResults.status)" -ForegroundColor Green
+        }
+        catch {
+            Write-Host "Warning: Could not parse $($file.Name): $_" -ForegroundColor Yellow
+        }
+    }
+}
+
+Write-Host "Found $($newEntries.Count) new entries to add" -ForegroundColor Cyan
+
+# Add new entries
+$history.entries += $newEntries
+
+# Sort by timestamp (newest first)
+$history.entries = $history.entries | Sort-Object { $_.timestamp } -Descending
+
+# Apply max entries limit if specified
+if ($MaxEntries -gt 0 -and $history.entries.Count -gt $MaxEntries) {
+    $history.entries = $history.entries | Select-Object -First $MaxEntries
+    Write-Host "Trimmed to $MaxEntries entries" -ForegroundColor Yellow
+}
+
+# Recalculate summary statistics
+$history.summary = @{
+    byStage = @{
+        "pre-release" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+        "pre-checkin" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+        "post-checkin" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+        "release" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+        "scheduled" = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+    }
+    bySample = @{}
+    totalBugsCaught = 0
+    totalTestsRun = 0
+    totalPassed = 0
+    totalFailed = 0
+}
+
+foreach ($entry in $history.entries) {
+    $stage = $entry.stage
+    $sample = $entry.sampleName
+    $results = $entry.testResults
+    $bugs = $entry.bugsCaught
+    
+    # Update stage stats
+    if ($history.summary.byStage.ContainsKey($stage)) {
+        $history.summary.byStage[$stage].runs++
+        $history.summary.byStage[$stage].passed += $results.passed
+        $history.summary.byStage[$stage].failed += $results.failed
+        $history.summary.byStage[$stage].bugsCaught += $bugs.count
+    }
+    
+    # Update sample stats
+    if (-not $history.summary.bySample.ContainsKey($sample)) {
+        $history.summary.bySample[$sample] = @{ runs = 0; passed = 0; failed = 0; bugsCaught = 0 }
+    }
+    $history.summary.bySample[$sample].runs++
+    $history.summary.bySample[$sample].passed += $results.passed
+    $history.summary.bySample[$sample].failed += $results.failed
+    $history.summary.bySample[$sample].bugsCaught += $bugs.count
+    
+    # Update totals
+    $history.summary.totalBugsCaught += $bugs.count
+    $history.summary.totalTestsRun += $results.total
+    $history.summary.totalPassed += $results.passed
+    $history.summary.totalFailed += $results.failed
+}
+
+$history.totalRuns = $history.entries.Count
+$history.lastUpdated = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
+
+# Ensure output directory exists
+$outputDir = Split-Path $HistoryFile -Parent
+if ($outputDir -and !(Test-Path $outputDir)) {
+    New-Item -ItemType Directory -Path $outputDir -Force | Out-Null
+}
+
+# Write history file
+$historyJson = $history | ConvertTo-Json -Depth 10
+$historyJson | Out-File -FilePath $HistoryFile -Encoding UTF8
+
+Write-Host ""
+Write-Host "✅ History updated: $HistoryFile" -ForegroundColor Green
+Write-Host ""
+Write-Host "=== Summary ===" -ForegroundColor Cyan
+Write-Host "Total Runs: $($history.totalRuns)" -ForegroundColor Gray
+Write-Host "Total Bugs Caught: $($history.summary.totalBugsCaught)" -ForegroundColor $(if ($history.summary.totalBugsCaught -gt 0) { "Yellow" } else { "Green" })
+Write-Host "Tests: $($history.summary.totalPassed) passed, $($history.summary.totalFailed) failed" -ForegroundColor Gray
+Write-Host ""
+Write-Host "Bugs by Stage:" -ForegroundColor Cyan
+foreach ($stage in $history.summary.byStage.Keys) {
+    $stageStats = $history.summary.byStage[$stage]
+    Write-Host "  $stage : $($stageStats.bugsCaught) bugs in $($stageStats.runs) runs" -ForegroundColor Gray
+}
diff --git a/scripts/e2e/Create-GitHubIssue.ps1 b/scripts/e2e/Create-GitHubIssue.ps1
new file mode 100644
index 00000000..16fbf512
--- /dev/null
+++ b/scripts/e2e/Create-GitHubIssue.ps1
@@ -0,0 +1,339 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+<#
+.SYNOPSIS
+    Automatically creates GitHub issues for E2E test failures.
+
+.DESCRIPTION
+    This script creates GitHub issues when E2E tests fail, categorizing errors
+    and linking them to the metrics dashboard for tracking.
+
+.PARAMETER MetricsFile
+    Path to the metrics JSON file from Emit-TestMetrics.ps1
+
+.PARAMETER Repository
+    GitHub repository in format "owner/repo"
+
+.PARAMETER Labels
+    Additional labels to add to the issue
+
+.PARAMETER DryRun
+    If set, only outputs what would be created without actually creating issues
+
+.EXAMPLE
+    ./Create-GitHubIssue.ps1 -MetricsFile "./metrics.json" -Repository "microsoft/Agent365-Samples"
+#>
+
+param(
+    [Parameter(Mandatory = $true)]
+    [string]$MetricsFile,
+
+    [Parameter(Mandatory = $false)]
+    [string]$Repository = "microsoft/Agent365-Samples",
+
+    [Parameter(Mandatory = $false)]
+    [string[]]$Labels = @("e2e-failure", "automated"),
+
+    [Parameter(Mandatory = $false)]
+    [switch]$DryRun
+)
+
+$ErrorActionPreference = "Stop"
+
+# Error category patterns for classification
+$ErrorCategories = @{
+    "SDK:Authentication" = @(
+        "authentication",
+        "auth failed",
+        "unauthorized",
+        "401",
+        "credential",
+        "token",
+        "DefaultAzureCredential"
+    )
+    "SDK:Connection" = @(
+        "connection",
+        "timeout",
+        "network",
+        "socket",
+        "ECONNREFUSED",
+        "ETIMEDOUT",
+        "connection refused"
+    )
+    "SDK:Configuration" = @(
+        "configuration",
+        "config",
+        "missing.*key",
+        "environment variable",
+        "appsettings",
+        "not configured"
+    )
+    "SDK:BreakingChange" = @(
+        "breaking change",
+        "deprecated",
+        "removed",
+        "no longer",
+        "api changed",
+        "schema.*changed"
+    )
+    "SDK:TypeMismatch" = @(
+        "type.*error",
+        "cannot convert",
+        "invalid cast",
+        "type mismatch",
+        "expected.*got"
+    )
+    "SDK:MissingDependency" = @(
+        "module not found",
+        "package not found",
+        "import error",
+        "could not load",
+        "dependency"
+    )
+    "Test:Assertion" = @(
+        "assert",
+        "expected",
+        "should be",
+        "to equal",
+        "not equal"
+    )
+    "Test:Timeout" = @(
+        "test.*timeout",
+        "exceeded.*time",
+        "took too long"
+    )
+    "Infrastructure:Service" = @(
+        "service unavailable",
+        "503",
+        "502",
+        "bad gateway",
+        "server error"
+    )
+    "Other" = @()
+}
+
+function Get-ErrorCategory {
+    param([string]$ErrorMessage)
+    
+    $lowerMessage = $ErrorMessage.ToLower()
+    
+    foreach ($category in $ErrorCategories.Keys) {
+        if ($category -eq "Other") { continue }
+        
+        foreach ($pattern in $ErrorCategories[$category]) {
+            if ($lowerMessage -match $pattern) {
+                return $category
+            }
+        }
+    }
+    
+    return "Other"
+}
+
+function Get-IssuePriority {
+    param(
+        [string]$Stage,
+        [string]$Category
+    )
+    
+    # Higher priority for issues caught later in the pipeline
+    $stagePriority = switch ($Stage) {
+        "release" { "P0" }
+        "post-checkin" { "P1" }
+        "pre-checkin" { "P2" }
+        "pre-release" { "P2" }
+        default { "P3" }
+    }
+    
+    # SDK breaking changes are high priority
+    if ($Category -eq "SDK:BreakingChange") {
+        $stagePriority = "P1"
+    }
+    
+    return $stagePriority
+}
+
+function New-GitHubIssue {
+    param(
+        [hashtable]$IssueData,
+        [string]$Repository,
+        [switch]$DryRun
+    )
+    
+    $title = $IssueData.title
+    $body = $IssueData.body
+    $labels = $IssueData.labels -join ","
+    
+    if ($DryRun) {
+        Write-Host ""
+        Write-Host "=== DRY RUN: Would create issue ===" -ForegroundColor Yellow
+        Write-Host "Title: $title" -ForegroundColor Cyan
+        Write-Host "Labels: $labels" -ForegroundColor Gray
+        Write-Host "Body:" -ForegroundColor Gray
+        Write-Host $body
+        Write-Host "===================================" -ForegroundColor Yellow
+        return @{ number = 0; html_url = "https://github.com/$Repository/issues/NEW" }
+    }
+    
+    # Use GitHub CLI to create issue
+    $bodyFile = [System.IO.Path]::GetTempFileName()
+    $body | Out-File -FilePath $bodyFile -Encoding UTF8
+    
+    try {
+        $result = gh issue create `
+            --repo $Repository `
+            --title $title `
+            --body-file $bodyFile `
+            --label ($IssueData.labels -join ",") 2>&1
+        
+        # Parse issue URL from result
+        if ($result -match "https://github.com/.+/issues/(\d+)") {
+            $issueNumber = $Matches[1]
+            return @{
+                number = [int]$issueNumber
+                html_url = $result.Trim()
+            }
+        }
+        
+        Write-Host "Warning: Could not parse issue URL from: $result" -ForegroundColor Yellow
+        return $null
+    }
+    catch {
+        Write-Host "Error creating issue: $_" -ForegroundColor Red
+        return $null
+    }
+    finally {
+        Remove-Item $bodyFile -Force -ErrorAction SilentlyContinue
+    }
+}
+
+# Main logic
+Write-Host "=== GitHub Issue Creator for E2E Failures ===" -ForegroundColor Cyan
+Write-Host "Metrics File: $MetricsFile" -ForegroundColor Gray
+Write-Host "Repository: $Repository" -ForegroundColor Gray
+Write-Host ""
+
+# Load metrics
+if (!(Test-Path $MetricsFile)) {
+    Write-Host "Error: Metrics file not found: $MetricsFile" -ForegroundColor Red
+    exit 1
+}
+
+$metrics = Get-Content $MetricsFile | ConvertFrom-Json
+
+# Check if there are failures
+if ($metrics.testResults.failed -eq 0) {
+    Write-Host "✅ No failures detected. No issues to create." -ForegroundColor Green
+    exit 0
+}
+
+Write-Host "🐛 Found $($metrics.testResults.failed) failure(s)" -ForegroundColor Yellow
+Write-Host ""
+
+# Process each failure
+$createdIssues = @()
+$categorizedErrors = @{}
+
+foreach ($bug in $metrics.bugsCaught.details) {
+    $category = Get-ErrorCategory -ErrorMessage $bug.errorMessage
+    $priority = Get-IssuePriority -Stage $metrics.stage -Category $category
+    
+    # Track categorized errors
+    if (!$categorizedErrors[$category]) {
+        $categorizedErrors[$category] = @{
+            count = 0
+            samples = @()
+            tests = @()
+        }
+    }
+    $categorizedErrors[$category].count++
+    if ($categorizedErrors[$category].samples -notcontains $metrics.sampleName) {
+        $categorizedErrors[$category].samples += $metrics.sampleName
+    }
+    $categorizedErrors[$category].tests += $bug.testName
+    
+    # Build SDK version info
+    $sdkInfo = if ($metrics.sdkVersions) {
+        ($metrics.sdkVersions.PSObject.Properties | ForEach-Object { "- $($_.Name): ``$($_.Value)``" }) -join "`n"
+    } else { "Not available" }
+    
+    # Create issue body
+    $body = @"
+## E2E Test Failure Report
+
+**Category:** $category
+**Priority:** $priority
+**Stage:** $($metrics.stage)
+**Sample:** $($metrics.sampleName)
+**Test:** $($bug.testName)
+
+### Error Message
+``````
+$($bug.errorMessage)
+``````
+
+### SDK Versions
+$sdkInfo
+
+### Context
+- **Run ID:** $($metrics.runId)
+- **Commit:** $($metrics.commitSha)
+- **Branch:** $($metrics.branch)
+- **Timestamp:** $($metrics.timestamp)
+
+### Reproduction
+1. Checkout commit ``$($metrics.commitSha)``
+2. Navigate to the ``$($metrics.sampleName)`` sample
+3. Run the E2E tests
+
+---
+*This issue was automatically created by the E2E test pipeline.*
+*Dashboard: [View Metrics](https://microsoft.github.io/Agent365-Samples/metrics/)*
+"@
+
+    $issueData = @{
+        title = "[$priority][$category] $($bug.testName) failed in $($metrics.sampleName)"
+        body = $body
+        labels = @($Labels) + @($category.Replace(":", "-").ToLower(), $priority.ToLower(), $metrics.stage)
+    }
+    
+    Write-Host "Creating issue for: $($bug.testName)" -ForegroundColor Cyan
+    Write-Host "  Category: $category" -ForegroundColor Gray
+    Write-Host "  Priority: $priority" -ForegroundColor Gray
+    
+    $issue = New-GitHubIssue -IssueData $issueData -Repository $Repository -DryRun:$DryRun
+    
+    if ($issue) {
+        $createdIssues += @{
+            issueNumber = $issue.number
+            issueUrl = $issue.html_url
+            testName = $bug.testName
+            category = $category
+            priority = $priority
+        }
+        Write-Host "  Created: $($issue.html_url)" -ForegroundColor Green
+    }
+}
+
+# Output summary
+Write-Host ""
+Write-Host "=== Summary ===" -ForegroundColor Cyan
+Write-Host "Issues created: $($createdIssues.Count)" -ForegroundColor Gray
+
+Write-Host ""
+Write-Host "Error Categories:" -ForegroundColor Cyan
+foreach ($cat in $categorizedErrors.Keys | Sort-Object) {
+    $data = $categorizedErrors[$cat]
+    Write-Host "  $cat : $($data.count) failure(s) across $($data.samples.Count) sample(s)" -ForegroundColor Gray
+}
+
+# Output JSON for workflow consumption
+$output = @{
+    timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
+    metricsId = $metrics.id
+    issuesCreated = $createdIssues
+    categorizedErrors = $categorizedErrors
+}
+
+$output | ConvertTo-Json -Depth 10
diff --git a/scripts/e2e/Emit-TestMetrics.ps1 b/scripts/e2e/Emit-TestMetrics.ps1
new file mode 100644
index 00000000..2d6d3c62
--- /dev/null
+++ b/scripts/e2e/Emit-TestMetrics.ps1
@@ -0,0 +1,247 @@
+<#
+.SYNOPSIS
+    Emits structured test metrics for tracking E2E test results and SDK versions.
+
+.DESCRIPTION
+    This script collects test results, SDK versions, and metadata to create a JSON
+    metrics file that can be used for dashboards and historical analysis.
+    It also validates that samples are using the latest SDK versions (including pre-release).
+
+.PARAMETER SampleName
+    Name of the sample being tested (e.g., "python-openai", "nodejs-langchain")
+
+.PARAMETER SamplePath
+    Path to the sample directory (for SDK version validation)
+
+.PARAMETER SampleType
+    Type of sample: "dotnet", "python", "nodejs" (for SDK version validation)
+
+.PARAMETER TestResultsPath
+    Path to the test results TRX file
+
+.PARAMETER SdkVersions
+    Hashtable of SDK versions (e.g., @{ "microsoft-agents-a365" = "0.1.5" })
+
+.PARAMETER Stage
+    The testing stage: "pre-release", "pre-checkin", "post-checkin", "release", "scheduled"
+
+.PARAMETER OutputPath
+    Path where the metrics JSON file will be written
+
+.PARAMETER SkipSdkValidation
+    Skip SDK version validation against latest available
+
+.EXAMPLE
+    ./Emit-TestMetrics.ps1 -SampleName "python-openai" -SamplePath "./python/openai/sample-agent" -SampleType "python" -Stage "pre-checkin"
+#>
+
+param(
+    [Parameter(Mandatory = $true)]
+    [string]$SampleName,
+
+    [Parameter(Mandatory = $false)]
+    [string]$SamplePath,
+
+    [Parameter(Mandatory = $false)]
+    [ValidateSet("dotnet", "python", "nodejs")]
+    [string]$SampleType,
+
+    [Parameter(Mandatory = $false)]
+    [string]$TestResultsPath,
+
+    [Parameter(Mandatory = $false)]
+    [hashtable]$SdkVersions = @{},
+
+    [Parameter(Mandatory = $true)]
+    [ValidateSet("pre-release", "pre-checkin", "post-checkin", "release", "scheduled")]
+    [string]$Stage,
+
+    [Parameter(Mandatory = $true)]
+    [string]$OutputPath,
+
+    [Parameter(Mandatory = $false)]
+    [int]$PassedTests = 0,
+
+    [Parameter(Mandatory = $false)]
+    [int]$FailedTests = 0,
+
+    [Parameter(Mandatory = $false)]
+    [int]$SkippedTests = 0,
+
+    [Parameter(Mandatory = $false)]
+    [string]$RunId = "",
+
+    [Parameter(Mandatory = $false)]
+    [string]$CommitSha = "",
+
+    [Parameter(Mandatory = $false)]
+    [string]$Branch = "",
+
+    [Parameter(Mandatory = $false)]
+    [switch]$SkipSdkValidation
+)
+
+$ErrorActionPreference = "Stop"
+
+Write-Host "=== Emitting Test Metrics ===" -ForegroundColor Cyan
+Write-Host "Sample: $SampleName" -ForegroundColor Gray
+Write-Host "Stage: $Stage" -ForegroundColor Gray
+
+# Parse TRX file if provided
+if ($TestResultsPath -and (Test-Path $TestResultsPath)) {
+    Write-Host "Parsing TRX file: $TestResultsPath" -ForegroundColor Gray
+    
+    try {
+        [xml]$trx = Get-Content $TestResultsPath
+        $counters = $trx.TestRun.ResultSummary.Counters
+        
+        $PassedTests = [int]$counters.passed
+        $FailedTests = [int]$counters.failed
+        $SkippedTests = [int]$counters.notExecuted
+        
+        Write-Host "Parsed: Passed=$PassedTests, Failed=$FailedTests, Skipped=$SkippedTests" -ForegroundColor Green
+    }
+    catch {
+        Write-Host "Warning: Could not parse TRX file: $_" -ForegroundColor Yellow
+    }
+}
+
+# Get environment info
+$timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
+$runId = if ($RunId) { $RunId } elseif ($env:GITHUB_RUN_ID) { $env:GITHUB_RUN_ID } else { "local-$(Get-Date -Format 'yyyyMMddHHmmss')" }
+$commitSha = if ($CommitSha) { $CommitSha } elseif ($env:GITHUB_SHA) { $env:GITHUB_SHA } else { (git rev-parse HEAD 2>$null) -or "unknown" }
+$branch = if ($Branch) { $Branch } elseif ($env:GITHUB_REF_NAME) { $env:GITHUB_REF_NAME } else { (git branch --show-current 2>$null) -or "unknown" }
+$actor = if ($env:GITHUB_ACTOR) { $env:GITHUB_ACTOR } else { $env:USERNAME }
+$workflow = if ($env:GITHUB_WORKFLOW) { $env:GITHUB_WORKFLOW } else { "local" }
+
+# Calculate test status
+$totalTests = $PassedTests + $FailedTests + $SkippedTests
+$status = if ($FailedTests -gt 0) { "failed" } elseif ($totalTests -eq 0) { "no-tests" } else { "passed" }
+
+# Run SDK version validation if sample path and type are provided
+$sdkValidation = $null
+if (-not $SkipSdkValidation -and $SamplePath -and $SampleType) {
+    Write-Host ""
+    Write-Host "🔍 Validating SDK versions..." -ForegroundColor Cyan
+    
+    $validateScript = Join-Path $PSScriptRoot "Validate-SdkVersions.ps1"
+    if (Test-Path $validateScript) {
+        try {
+            $validationJson = & $validateScript -SamplePath $SamplePath -SampleType $SampleType -IncludePreRelease $true -OutputJson 2>&1 | Select-Object -Last 1
+            if ($validationJson) {
+                $sdkValidation = $validationJson | ConvertFrom-Json
+                
+                # Extract installed versions if not already provided
+                if ($SdkVersions.Count -eq 0 -and $sdkValidation.packages) {
+                    foreach ($pkg in $sdkValidation.packages) {
+                        $SdkVersions[$pkg.package] = $pkg.installed
+                    }
+                }
+                
+                Write-Host "SDK Validation: $($sdkValidation.validation.upToDate)/$($sdkValidation.validation.packagesChecked) packages up to date" -ForegroundColor $(if ($sdkValidation.validation.allUpToDate) { "Green" } else { "Yellow" })
+            }
+        }
+        catch {
+            Write-Host "Warning: SDK validation failed: $_" -ForegroundColor Yellow
+        }
+    }
+}
+
+# Build metrics object
+$metrics = @{
+    # Identifiers
+    id = "$runId-$SampleName"
+    runId = $runId
+    sampleName = $SampleName
+    
+    # Timing
+    timestamp = $timestamp
+    
+    # Git info
+    commitSha = $commitSha
+    branch = $branch
+    actor = $actor
+    
+    # Workflow info
+    workflow = $workflow
+    stage = $Stage
+    
+    # Test results
+    testResults = @{
+        status = $status
+        passed = $PassedTests
+        failed = $FailedTests
+        skipped = $SkippedTests
+        total = $totalTests
+    }
+    
+    # SDK versions
+    sdkVersions = $SdkVersions
+    
+    # SDK version validation
+    sdkValidation = if ($sdkValidation) {
+        @{
+            allUpToDate = $sdkValidation.validation.allUpToDate
+            packagesChecked = $sdkValidation.validation.packagesChecked
+            upToDate = $sdkValidation.validation.upToDate
+            outdated = $sdkValidation.validation.outdated
+            usingPreRelease = $sdkValidation.validation.usingPreRelease
+            packages = $sdkValidation.packages | ForEach-Object {
+                @{
+                    package = $_.package
+                    installed = $_.installed
+                    latest = $_.latest
+                    isUpToDate = $_.isUpToDate
+                    isPreRelease = $_.isPreRelease
+                }
+            }
+        }
+    } else { $null }
+    
+    # Bugs caught (will be populated if tests failed)
+    bugsCaught = @{
+        count = $FailedTests
+        stage = $Stage
+        details = @()
+    }
+}
+
+# If we have a TRX file, extract failed test details
+if ($TestResultsPath -and (Test-Path $TestResultsPath) -and $FailedTests -gt 0) {
+    try {
+        [xml]$trx = Get-Content $TestResultsPath
+        $failedResults = $trx.TestRun.Results.UnitTestResult | Where-Object { $_.outcome -eq "Failed" }
+        
+        foreach ($result in $failedResults) {
+            $metrics.bugsCaught.details += @{
+                testName = $result.testName
+                errorMessage = ($result.Output.ErrorInfo.Message -replace "`r`n", " " -replace "`n", " ").Substring(0, [Math]::Min(500, $result.Output.ErrorInfo.Message.Length))
+            }
+        }
+    }
+    catch {
+        Write-Host "Warning: Could not extract failed test details: $_" -ForegroundColor Yellow
+    }
+}
+
+# Ensure output directory exists
+$outputDir = Split-Path $OutputPath -Parent
+if ($outputDir -and !(Test-Path $outputDir)) {
+    New-Item -ItemType Directory -Path $outputDir -Force | Out-Null
+}
+
+# Write metrics file
+$metricsJson = $metrics | ConvertTo-Json -Depth 10
+$metricsJson | Out-File -FilePath $OutputPath -Encoding UTF8
+
+Write-Host ""
+Write-Host "✅ Metrics written to: $OutputPath" -ForegroundColor Green
+Write-Host ""
+Write-Host "=== Metrics Summary ===" -ForegroundColor Cyan
+Write-Host "Status: $status" -ForegroundColor $(if ($status -eq "passed") { "Green" } else { "Red" })
+Write-Host "Tests: $PassedTests passed, $FailedTests failed, $SkippedTests skipped" -ForegroundColor Gray
+Write-Host "Stage: $Stage" -ForegroundColor Gray
+Write-Host "SDK Versions: $($SdkVersions.Count) tracked" -ForegroundColor Gray
+
+# Output the metrics for workflow consumption
+Write-Output $metricsJson
diff --git a/scripts/e2e/Validate-SdkVersions.ps1 b/scripts/e2e/Validate-SdkVersions.ps1
new file mode 100644
index 00000000..53aff360
--- /dev/null
+++ b/scripts/e2e/Validate-SdkVersions.ps1
@@ -0,0 +1,372 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+<#
+.SYNOPSIS
+    Validates that samples are using the latest SDK versions, including pre-release.
+
+.DESCRIPTION
+    This script checks the SDK versions used in samples against the latest available
+    versions from package registries (NuGet, PyPI, npm). It verifies that E2E tests
+    are testing against the most recent SDK versions to catch issues early.
+
+.PARAMETER SamplePath
+    Path to the sample directory
+
+.PARAMETER SampleType
+    Type of sample: "dotnet", "python", "nodejs"
+
+.PARAMETER IncludePreRelease
+    Whether to include pre-release versions in the check (default: true)
+
+.EXAMPLE
+    ./Validate-SdkVersions.ps1 -SamplePath "./python/openai/sample-agent" -SampleType "python"
+#>
+
+param(
+    [Parameter(Mandatory = $true)]
+    [string]$SamplePath,
+
+    [Parameter(Mandatory = $true)]
+    [ValidateSet("dotnet", "python", "nodejs")]
+    [string]$SampleType,
+
+    [Parameter(Mandatory = $false)]
+    [bool]$IncludePreRelease = $true,
+
+    [Parameter(Mandatory = $false)]
+    [switch]$OutputJson
+)
+
+$ErrorActionPreference = "Stop"
+
+# SDK packages to track for each platform
+$SdkPackages = @{
+    dotnet = @(
+        "Microsoft.Agents.Hosting.AspNetCore",
+        "Microsoft.Agents.Core",
+        "Microsoft.Agents.CopilotStudio.Client",
+        "Microsoft.SemanticKernel"
+    )
+    python = @(
+        "microsoft-agents-core",
+        "microsoft-agents-hosting-aiohttp",
+        "microsoft-agents-a365-tooling-extensions-openai",
+        "openai",
+        "google-adk"
+    )
+    nodejs = @(
+        "@anthropic-ai/sdk",
+        "langchain",
+        "@langchain/core",
+        "@langchain/openai",
+        "openai",
+        "ai"
+    )
+}
+
+function Get-NuGetLatestVersion {
+    param(
+        [string]$PackageName,
+        [bool]$IncludePreRelease
+    )
+    
+    try {
+        $url = "https://api.nuget.org/v3-flatcontainer/$($PackageName.ToLower())/index.json"
+        $response = Invoke-RestMethod -Uri $url -ErrorAction SilentlyContinue
+        
+        if ($response.versions) {
+            $versions = $response.versions
+            
+            if (-not $IncludePreRelease) {
+                # Filter out pre-release versions (contain -)
+                $versions = $versions | Where-Object { $_ -notmatch '-' }
+            }
+            
+            return $versions[-1]  # Last version is the latest
+        }
+    }
+    catch {
+        Write-Host "  Warning: Could not fetch NuGet version for $PackageName" -ForegroundColor Yellow
+    }
+    
+    return $null
+}
+
+function Get-PyPILatestVersion {
+    param(
+        [string]$PackageName,
+        [bool]$IncludePreRelease
+    )
+    
+    try {
+        $url = "https://pypi.org/pypi/$PackageName/json"
+        $response = Invoke-RestMethod -Uri $url -ErrorAction SilentlyContinue
+        
+        if ($response.info.version) {
+            $latestStable = $response.info.version
+            
+            if ($IncludePreRelease -and $response.releases) {
+                # Get all versions and find the latest (including pre-release)
+                $allVersions = $response.releases.PSObject.Properties.Name | 
+                    Where-Object { $response.releases.$_.Count -gt 0 } |
+                    Sort-Object { [Version]($_ -replace '[^0-9.]', '' -replace '\.+', '.').TrimEnd('.') } -ErrorAction SilentlyContinue
+                
+                # Get the latest pre-release if available
+                $preReleases = $allVersions | Where-Object { $_ -match '(a|b|rc|dev|pre|alpha|beta)' }
+                if ($preReleases) {
+                    $latestPreRelease = $preReleases[-1]
+                    # Compare versions to see if pre-release is newer
+                    # For simplicity, return pre-release if it exists with higher base version
+                    return $latestPreRelease
+                }
+            }
+            
+            return $latestStable
+        }
+    }
+    catch {
+        Write-Host "  Warning: Could not fetch PyPI version for $PackageName" -ForegroundColor Yellow
+    }
+    
+    return $null
+}
+
+function Get-NpmLatestVersion {
+    param(
+        [string]$PackageName,
+        [bool]$IncludePreRelease
+    )
+    
+    try {
+        $url = "https://registry.npmjs.org/$PackageName"
+        $response = Invoke-RestMethod -Uri $url -ErrorAction SilentlyContinue
+        
+        if ($response.'dist-tags') {
+            if ($IncludePreRelease) {
+                # Check for next, beta, alpha, rc tags
+                $preTags = @('next', 'beta', 'alpha', 'rc', 'canary', 'preview')
+                foreach ($tag in $preTags) {
+                    if ($response.'dist-tags'.$tag) {
+                        return @{
+                            version = $response.'dist-tags'.$tag
+                            tag = $tag
+                        }
+                    }
+                }
+            }
+            
+            return @{
+                version = $response.'dist-tags'.latest
+                tag = 'latest'
+            }
+        }
+    }
+    catch {
+        Write-Host "  Warning: Could not fetch npm version for $PackageName" -ForegroundColor Yellow
+    }
+    
+    return $null
+}
+
+function Get-InstalledVersions {
+    param(
+        [string]$SamplePath,
+        [string]$SampleType
+    )
+    
+    $versions = @{}
+    
+    switch ($SampleType) {
+        "dotnet" {
+            # Parse .csproj files for PackageReference
+            $csprojFiles = Get-ChildItem -Path $SamplePath -Filter "*.csproj" -Recurse
+            foreach ($csproj in $csprojFiles) {
+                [xml]$content = Get-Content $csproj.FullName
+                $packageRefs = $content.Project.ItemGroup.PackageReference
+                foreach ($pkg in $packageRefs) {
+                    if ($pkg.Include -and $pkg.Version) {
+                        $versions[$pkg.Include] = $pkg.Version
+                    }
+                }
+            }
+        }
+        "python" {
+            # Parse requirements.txt
+            $reqFile = Join-Path $SamplePath "requirements.txt"
+            if (Test-Path $reqFile) {
+                $lines = Get-Content $reqFile
+                foreach ($line in $lines) {
+                    if ($line -match '^([a-zA-Z0-9_-]+)\s*([=<>!~]+)?\s*([\d.a-zA-Z-]+)?') {
+                        $pkgName = $Matches[1]
+                        $version = if ($Matches[3]) { $Matches[3] } else { "not-pinned" }
+                        $versions[$pkgName] = $version
+                    }
+                }
+            }
+            
+            # Also check pyproject.toml
+            $pyprojectFile = Join-Path $SamplePath "pyproject.toml"
+            if (Test-Path $pyprojectFile) {
+                $content = Get-Content $pyprojectFile -Raw
+                if ($content -match 'dependencies\s*=\s*\[([\s\S]*?)\]') {
+                    $deps = $Matches[1]
+                    $depMatches = [regex]::Matches($deps, '"([a-zA-Z0-9_-]+)\s*([=<>!~]+)?\s*([\d.a-zA-Z-]+)?"')
+                    foreach ($match in $depMatches) {
+                        $pkgName = $match.Groups[1].Value
+                        $version = if ($match.Groups[3].Value) { $match.Groups[3].Value } else { "not-pinned" }
+                        $versions[$pkgName] = $version
+                    }
+                }
+            }
+        }
+        "nodejs" {
+            # Parse package.json
+            $pkgJsonFile = Join-Path $SamplePath "package.json"
+            if (Test-Path $pkgJsonFile) {
+                $pkgJson = Get-Content $pkgJsonFile | ConvertFrom-Json
+                
+                $allDeps = @{}
+                if ($pkgJson.dependencies) {
+                    $pkgJson.dependencies.PSObject.Properties | ForEach-Object {
+                        $allDeps[$_.Name] = $_.Value -replace '[\^~>=<]', ''
+                    }
+                }
+                if ($pkgJson.devDependencies) {
+                    $pkgJson.devDependencies.PSObject.Properties | ForEach-Object {
+                        $allDeps[$_.Name] = $_.Value -replace '[\^~>=<]', ''
+                    }
+                }
+                
+                $versions = $allDeps
+            }
+        }
+    }
+    
+    return $versions
+}
+
+# Main validation logic
+Write-Host "=== SDK Version Validation ===" -ForegroundColor Cyan
+Write-Host "Sample Path: $SamplePath" -ForegroundColor Gray
+Write-Host "Sample Type: $SampleType" -ForegroundColor Gray
+Write-Host "Include Pre-Release: $IncludePreRelease" -ForegroundColor Gray
+Write-Host ""
+
+# Get installed versions
+Write-Host "📦 Reading installed versions..." -ForegroundColor Cyan
+$installedVersions = Get-InstalledVersions -SamplePath $SamplePath -SampleType $SampleType
+
+if ($installedVersions.Count -eq 0) {
+    Write-Host "⚠️  No package versions found in sample" -ForegroundColor Yellow
+    exit 0
+}
+
+Write-Host "Found $($installedVersions.Count) packages" -ForegroundColor Gray
+Write-Host ""
+
+# Get tracked SDK packages for this type
+$trackedPackages = $SdkPackages[$SampleType]
+
+# Check each tracked package
+$validationResults = @()
+$hasOutdated = $false
+
+Write-Host "🔍 Checking against latest versions..." -ForegroundColor Cyan
+Write-Host ""
+
+foreach ($pkgName in $trackedPackages) {
+    $installed = $installedVersions[$pkgName]
+    
+    if (-not $installed) {
+        continue  # Package not used in this sample
+    }
+    
+    $latest = $null
+    $latestTag = "latest"
+    
+    switch ($SampleType) {
+        "dotnet" {
+            $latest = Get-NuGetLatestVersion -PackageName $pkgName -IncludePreRelease $IncludePreRelease
+        }
+        "python" {
+            $latest = Get-PyPILatestVersion -PackageName $pkgName -IncludePreRelease $IncludePreRelease
+        }
+        "nodejs" {
+            $result = Get-NpmLatestVersion -PackageName $pkgName -IncludePreRelease $IncludePreRelease
+            if ($result) {
+                $latest = $result.version
+                $latestTag = $result.tag
+            }
+        }
+    }
+    
+    if ($latest) {
+        $isUpToDate = ($installed -eq $latest) -or ($installed -eq "not-pinned")
+        $isPreRelease = $latest -match '(alpha|beta|preview|pre|rc|dev|a\d|b\d|-)'
+        
+        $result = @{
+            package = $pkgName
+            installed = $installed
+            latest = $latest
+            latestTag = $latestTag
+            isUpToDate = $isUpToDate
+            isPreRelease = $isPreRelease
+        }
+        
+        $validationResults += $result
+        
+        $statusIcon = if ($isUpToDate) { "✅" } else { "⚠️"; $hasOutdated = $true }
+        $preReleaseLabel = if ($isPreRelease) { " (pre-release)" } else { "" }
+        
+        Write-Host "$statusIcon $pkgName" -ForegroundColor $(if ($isUpToDate) { "Green" } else { "Yellow" })
+        Write-Host "   Installed: $installed" -ForegroundColor Gray
+        Write-Host "   Latest:    $latest$preReleaseLabel" -ForegroundColor $(if ($isPreRelease) { "Magenta" } else { "Gray" })
+    }
+}
+
+Write-Host ""
+
+# Summary
+$upToDateCount = ($validationResults | Where-Object { $_.isUpToDate }).Count
+$outdatedCount = ($validationResults | Where-Object { -not $_.isUpToDate }).Count
+$preReleaseCount = ($validationResults | Where-Object { $_.isPreRelease }).Count
+
+Write-Host "=== Validation Summary ===" -ForegroundColor Cyan
+Write-Host "Packages checked: $($validationResults.Count)" -ForegroundColor Gray
+Write-Host "Up to date: $upToDateCount" -ForegroundColor Green
+Write-Host "Outdated: $outdatedCount" -ForegroundColor $(if ($outdatedCount -gt 0) { "Yellow" } else { "Gray" })
+Write-Host "Using pre-release: $preReleaseCount" -ForegroundColor $(if ($preReleaseCount -gt 0) { "Magenta" } else { "Gray" })
+
+if ($hasOutdated) {
+    Write-Host ""
+    Write-Host "⚠️  Some SDK packages are not using the latest version!" -ForegroundColor Yellow
+    Write-Host "Consider updating to test against the newest SDK releases." -ForegroundColor Yellow
+}
+
+# Output JSON if requested
+if ($OutputJson) {
+    $output = @{
+        samplePath = $SamplePath
+        sampleType = $SampleType
+        timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
+        includePreRelease = $IncludePreRelease
+        validation = @{
+            allUpToDate = (-not $hasOutdated)
+            packagesChecked = $validationResults.Count
+            upToDate = $upToDateCount
+            outdated = $outdatedCount
+            usingPreRelease = $preReleaseCount
+        }
+        packages = $validationResults
+    }
+    
+    $output | ConvertTo-Json -Depth 10
+}
+
+# Return exit code based on validation
+if ($hasOutdated) {
+    exit 1
+} else {
+    exit 0
+}

From 14fbbc1b9c58141b42bb7413ae3c73c403449cb4 Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Thu, 5 Feb 2026 16:11:56 -0800
Subject: [PATCH 2/9] Add embedded fallback data for dashboard offline viewing

---
 docs/metrics/history.json | 460 +++++++++++++++++++-------------------
 docs/metrics/index.html   |  18 +-
 2 files changed, 240 insertions(+), 238 deletions(-)

diff --git a/docs/metrics/history.json b/docs/metrics/history.json
index 6bea09ce..4b2ff0d6 100644
--- a/docs/metrics/history.json
+++ b/docs/metrics/history.json
@@ -1,350 +1,344 @@
 {
-  "lastUpdated": "2026-02-05T10:30:00Z",
-  "totalRuns": 12,
+  "lastUpdated": "2026-02-05T15:40:00Z",
+  "totalRuns": 24,
   "entries": [
     {
-      "id": "run-001",
-      "timestamp": "2026-02-05T10:30:00Z",
-      "stage": "pre-release",
+      "id": "run-83",
+      "timestamp": "2026-02-05T15:30:00Z",
+      "stage": "post-checkin",
       "sampleName": "python-openai",
       "sdkVersions": {
-        "microsoft-agents-a365-tooling": "0.9.0-preview.3",
-        "microsoft-agents-core": "0.9.0-preview.3"
+        "microsoft-agents-core": "0.1.6",
+        "microsoft-agents-hosting-aiohttp": "0.1.6",
+        "openai": "1.59.9"
       },
       "sdkValidation": {
         "allUpToDate": true,
-        "packagesChecked": 2,
-        "upToDate": 2,
+        "packagesChecked": 3,
+        "upToDate": 3,
         "outdated": 0,
-        "usingPreRelease": 2,
+        "usingPreRelease": 0,
         "packages": [
-          {
-            "package": "microsoft-agents-a365-tooling",
-            "installed": "0.9.0-preview.3",
-            "latest": "0.9.0-preview.3",
-            "isUpToDate": true,
-            "isPreRelease": true
-          },
-          {
-            "package": "microsoft-agents-core",
-            "installed": "0.9.0-preview.3",
-            "latest": "0.9.0-preview.3",
-            "isUpToDate": true,
-            "isPreRelease": true
-          }
+          { "package": "microsoft-agents-core", "installed": "0.1.6", "latest": "0.1.6", "isUpToDate": true, "isPreRelease": false },
+          { "package": "microsoft-agents-hosting-aiohttp", "installed": "0.1.6", "latest": "0.1.6", "isUpToDate": true, "isPreRelease": false },
+          { "package": "openai", "installed": "1.59.9", "latest": "1.59.9", "isUpToDate": true, "isPreRelease": false }
         ]
       },
-      "testResults": {
-        "status": "failed",
-        "total": 5,
-        "passed": 3,
-        "failed": 2,
-        "skipped": 0
+      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
+      "bugsCaught": { "count": 0, "details": [] },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21732666002"
+    },
+    {
+      "id": "run-82",
+      "timestamp": "2026-02-05T14:05:00Z",
+      "stage": "pre-checkin",
+      "sampleName": "python-openai",
+      "sdkVersions": {
+        "microsoft-agents-core": "0.1.6",
+        "microsoft-agents-hosting-aiohttp": "0.1.6",
+        "openai": "1.59.9"
       },
+      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
+      "bugsCaught": { "count": 0, "details": [] },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21730254989"
+    },
+    {
+      "id": "run-81",
+      "timestamp": "2026-02-05T13:45:00Z",
+      "stage": "pre-checkin",
+      "sampleName": "python-openai",
+      "sdkVersions": {
+        "microsoft-agents-core": "0.1.6",
+        "openai": "1.59.9"
+      },
+      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
+      "bugsCaught": { "count": 0, "details": [] },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21729679961"
+    },
+    {
+      "id": "run-80",
+      "timestamp": "2026-02-05T13:36:00Z",
+      "stage": "pre-checkin",
+      "sampleName": "python-google-adk",
+      "sdkVersions": {
+        "microsoft-agents-core": "0.1.6",
+        "google-adk": "0.3.0"
+      },
+      "testResults": { "status": "failed", "total": 5, "passed": 3, "failed": 2, "skipped": 0 },
       "bugsCaught": {
         "count": 2,
         "details": [
           {
             "testName": "test_mcp_email_tools_registration",
-            "errorMessage": "McpToolset connection timeout with new SDK auth changes",
+            "errorMessage": "McpToolset failed to connect - connection timeout while registering MCP tools",
             "issueNumber": 215,
             "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/215",
             "issueState": "open"
           },
           {
-            "testName": "test_notification_handling",
-            "errorMessage": "AgentNotificationActivity missing type field in 0.9.0-preview.3",
+            "testName": "test_agent_tool_execution",
+            "errorMessage": "Google ADK McpToolset does not have official A365 extension - custom implementation needed",
             "issueNumber": 216,
             "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/216",
             "issueState": "open"
           }
         ]
       },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12345"
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21729406849"
     },
     {
-      "id": "run-002",
-      "timestamp": "2026-02-04T15:22:00Z",
+      "id": "run-79",
+      "timestamp": "2026-02-05T13:23:00Z",
       "stage": "pre-checkin",
-      "sampleName": "dotnet-semantic-kernel",
+      "sampleName": "python-google-adk",
       "sdkVersions": {
-        "Microsoft.Agents.Core": "0.8.5",
-        "Microsoft.SemanticKernel": "1.45.0"
+        "microsoft-agents-core": "0.1.6",
+        "google-adk": "0.3.0"
       },
-      "sdkValidation": {
-        "allUpToDate": true,
-        "packagesChecked": 2,
-        "upToDate": 2,
-        "outdated": 0,
-        "usingPreRelease": 0,
-        "packages": [
+      "testResults": { "status": "failed", "total": 5, "passed": 3, "failed": 2, "skipped": 0 },
+      "bugsCaught": {
+        "count": 2,
+        "details": [
           {
-            "package": "Microsoft.Agents.Core",
-            "installed": "0.8.5",
-            "latest": "0.8.5",
-            "isUpToDate": true,
-            "isPreRelease": false
+            "testName": "test_mcp_email_tools_registration",
+            "errorMessage": "McpToolset connection timeout with MCP server"
           },
           {
-            "package": "Microsoft.SemanticKernel",
-            "installed": "1.45.0",
-            "latest": "1.45.0",
-            "isUpToDate": true,
-            "isPreRelease": false
+            "testName": "test_agent_tool_execution",
+            "errorMessage": "Tool registration failed - missing A365 extension"
           }
         ]
       },
-      "testResults": {
-        "status": "failed",
-        "total": 6,
-        "passed": 5,
-        "failed": 1,
-        "skipped": 0
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21729024731"
+    },
+    {
+      "id": "run-78",
+      "timestamp": "2026-02-05T11:52:00Z",
+      "stage": "pre-checkin",
+      "sampleName": "python-google-adk",
+      "sdkVersions": {
+        "microsoft-agents-core": "0.1.6",
+        "google-adk": "0.3.0"
       },
+      "testResults": { "status": "failed", "total": 5, "passed": 3, "failed": 2, "skipped": 0 },
       "bugsCaught": {
-        "count": 1,
+        "count": 2,
         "details": [
           {
-            "testName": "test_agent_streaming_response",
-            "errorMessage": "Breaking change in SK 1.45.0 streaming API",
-            "issueNumber": 210,
-            "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/210",
-            "issueState": "closed"
+            "testName": "test_mcp_email_tools",
+            "errorMessage": "MCP toolset connection refused - server not responding"
+          },
+          {
+            "testName": "test_tool_calling",
+            "errorMessage": "Tool schema validation failed"
           }
         ]
       },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12344"
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21726196689"
     },
     {
-      "id": "run-003",
-      "timestamp": "2026-02-04T08:15:00Z",
-      "stage": "release",
-      "sampleName": "nodejs-langchain",
+      "id": "run-75",
+      "timestamp": "2026-02-05T11:33:00Z",
+      "stage": "pre-checkin",
+      "sampleName": "python-openai",
+      "sdkVersions": {
+        "microsoft-agents-core": "0.1.6",
+        "openai": "1.59.9"
+      },
+      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
+      "bugsCaught": { "count": 0, "details": [] },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21725610414"
+    },
+    {
+      "id": "run-73",
+      "timestamp": "2026-02-05T11:13:00Z",
+      "stage": "post-checkin",
+      "sampleName": "python-openai",
       "sdkVersions": {
-        "@anthropic-ai/sdk": "0.35.0",
+        "microsoft-agents-core": "0.1.6",
+        "openai": "1.59.9"
+      },
+      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
+      "bugsCaught": { "count": 0, "details": [] },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21724968616"
+    },
+    {
+      "id": "run-71",
+      "timestamp": "2026-02-05T10:18:00Z",
+      "stage": "scheduled",
+      "sampleName": "all-samples",
+      "sdkVersions": {
+        "microsoft-agents-core": "0.1.6",
+        "Microsoft.Agents.Hosting.AspNetCore": "0.1.15-preview",
+        "openai": "1.59.9",
         "langchain": "0.3.15"
       },
       "sdkValidation": {
         "allUpToDate": true,
-        "packagesChecked": 2,
-        "upToDate": 2,
+        "packagesChecked": 4,
+        "upToDate": 4,
         "outdated": 0,
-        "usingPreRelease": 0,
+        "usingPreRelease": 1,
         "packages": [
-          {
-            "package": "@anthropic-ai/sdk",
-            "installed": "0.35.0",
-            "latest": "0.35.0",
-            "isUpToDate": true,
-            "isPreRelease": false
-          },
-          {
-            "package": "langchain",
-            "installed": "0.3.15",
-            "latest": "0.3.15",
-            "isUpToDate": true,
-            "isPreRelease": false
-          }
+          { "package": "Microsoft.Agents.Hosting.AspNetCore", "installed": "0.1.15-preview", "latest": "0.1.15-preview", "isUpToDate": true, "isPreRelease": true }
         ]
       },
-      "testResults": {
-        "status": "failed",
-        "total": 5,
-        "passed": 4,
-        "failed": 1,
-        "skipped": 0
+      "testResults": { "status": "passed", "total": 20, "passed": 20, "failed": 0, "skipped": 0 },
+      "bugsCaught": { "count": 0, "details": [] },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21723256223"
+    },
+    {
+      "id": "run-67",
+      "timestamp": "2026-02-04T22:21:00Z",
+      "stage": "scheduled",
+      "sampleName": "nodejs-langchain",
+      "sdkVersions": {
+        "langchain": "0.3.14",
+        "@langchain/core": "0.3.30"
+      },
+      "sdkValidation": {
+        "allUpToDate": false,
+        "packagesChecked": 2,
+        "upToDate": 1,
+        "outdated": 1,
+        "usingPreRelease": 0,
+        "packages": [
+          { "package": "langchain", "installed": "0.3.14", "latest": "0.3.15", "isUpToDate": false, "isPreRelease": false },
+          { "package": "@langchain/core", "installed": "0.3.30", "latest": "0.3.30", "isUpToDate": true, "isPreRelease": false }
+        ]
       },
+      "testResults": { "status": "failed", "total": 5, "passed": 4, "failed": 1, "skipped": 0 },
       "bugsCaught": {
         "count": 1,
         "details": [
           {
             "testName": "test_tool_calling_format",
-            "errorMessage": "LangChain 0.3.15 changed tool schema format",
+            "errorMessage": "LangChain tool schema format changed - breaking change in structured output",
             "issueNumber": 208,
             "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/208",
             "issueState": "closed"
           }
         ]
       },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12343"
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21701195841"
     },
     {
-      "id": "run-004",
-      "timestamp": "2026-02-03T19:45:00Z",
-      "stage": "post-checkin",
-      "sampleName": "dotnet-semantic-kernel",
-      "sdkVersions": {
-        "Microsoft.Agents.Core": "0.8.4",
-        "Microsoft.SemanticKernel": "1.44.0"
-      },
-      "testResults": {
-        "status": "passed",
-        "total": 6,
-        "passed": 6,
-        "failed": 0,
-        "skipped": 0
-      },
-      "bugsCaught": {
-        "count": 0,
-        "details": []
-      },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12342"
-    },
-    {
-      "id": "run-005",
-      "timestamp": "2026-02-03T14:30:00Z",
-      "stage": "pre-release",
-      "sampleName": "python-openai",
+      "id": "run-64",
+      "timestamp": "2026-02-04T14:00:00Z",
+      "stage": "pre-checkin",
+      "sampleName": "python-google-adk",
       "sdkVersions": {
-        "microsoft-agents-a365-tooling": "0.9.0-preview.2",
-        "microsoft-agents-core": "0.9.0-preview.2"
-      },
-      "testResults": {
-        "status": "failed",
-        "total": 5,
-        "passed": 4,
-        "failed": 1,
-        "skipped": 0
+        "google-adk": "0.3.0",
+        "microsoft-agents-core": "0.1.6"
       },
+      "testResults": { "status": "failed", "total": 5, "passed": 2, "failed": 3, "skipped": 0 },
       "bugsCaught": {
-        "count": 1,
+        "count": 3,
         "details": [
           {
-            "testName": "test_azure_openai_auth",
-            "errorMessage": "DefaultAzureCredential not properly initialized in preview.2"
+            "testName": "test_copilot_studio_connection",
+            "errorMessage": "Copilot Studio authentication failed - token expired"
+          },
+          {
+            "testName": "test_adk_agent_creation",
+            "errorMessage": "Google ADK agent initialization failed - missing configuration"
+          },
+          {
+            "testName": "test_mcp_tools",
+            "errorMessage": "MCP connection timeout"
           }
         ]
       },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12341"
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21690045934"
     },
     {
-      "id": "run-006",
-      "timestamp": "2026-02-02T11:00:00Z",
+      "id": "run-62",
+      "timestamp": "2026-02-04T11:05:00Z",
       "stage": "pre-checkin",
-      "sampleName": "nodejs-langchain",
+      "sampleName": "dotnet-semantic-kernel",
       "sdkVersions": {
-        "@anthropic-ai/sdk": "0.34.0",
-        "langchain": "0.3.14"
+        "Microsoft.Agents.Hosting.AspNetCore": "0.1.15-preview",
+        "Microsoft.SemanticKernel": "1.35.0"
       },
       "sdkValidation": {
-        "allUpToDate": false,
+        "allUpToDate": true,
         "packagesChecked": 2,
-        "upToDate": 0,
-        "outdated": 2,
-        "usingPreRelease": 0,
+        "upToDate": 2,
+        "outdated": 0,
+        "usingPreRelease": 1,
         "packages": [
-          {
-            "package": "@anthropic-ai/sdk",
-            "installed": "0.34.0",
-            "latest": "0.35.0",
-            "isUpToDate": false,
-            "isPreRelease": false
-          },
-          {
-            "package": "langchain",
-            "installed": "0.3.14",
-            "latest": "0.3.15",
-            "isUpToDate": false,
-            "isPreRelease": false
-          }
-        ]
-      },
-      "testResults": {
-        "status": "failed",
-        "total": 5,
-        "passed": 4,
-        "failed": 1,
-        "skipped": 0
-      },
-      "bugsCaught": {
-        "count": 1,
-        "details": [
-          {
-            "testName": "test_agent_memory_persistence",
-            "errorMessage": "Memory buffer overflow with large conversation history"
-          }
+          { "package": "Microsoft.Agents.Hosting.AspNetCore", "installed": "0.1.15-preview", "latest": "0.1.15-preview", "isUpToDate": true, "isPreRelease": true },
+          { "package": "Microsoft.SemanticKernel", "installed": "1.35.0", "latest": "1.35.0", "isUpToDate": true, "isPreRelease": false }
         ]
       },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/12340"
+      "testResults": { "status": "passed", "total": 6, "passed": 6, "failed": 0, "skipped": 0 },
+      "bugsCaught": { "count": 0, "details": [] },
+      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21684611327"
     }
   ],
   "summary": {
     "byStage": {
-      "pre-release": { "runs": 4, "passed": 12, "failed": 3, "bugsCaught": 3 },
-      "pre-checkin": { "runs": 3, "passed": 10, "failed": 2, "bugsCaught": 2 },
-      "post-checkin": { "runs": 3, "passed": 13, "failed": 1, "bugsCaught": 1 },
-      "release": { "runs": 2, "passed": 10, "failed": 1, "bugsCaught": 1 }
+      "pre-release": { "runs": 0, "passed": 0, "failed": 0, "bugsCaught": 0 },
+      "pre-checkin": { "runs": 9, "passed": 25, "failed": 11, "bugsCaught": 9 },
+      "post-checkin": { "runs": 3, "passed": 16, "failed": 0, "bugsCaught": 0 },
+      "release": { "runs": 0, "passed": 0, "failed": 0, "bugsCaught": 0 },
+      "scheduled": { "runs": 2, "passed": 24, "failed": 1, "bugsCaught": 1 }
     },
     "bySample": {
-      "dotnet-semantic-kernel": { "runs": 4, "passed": 15, "failed": 2, "bugsCaught": 2 },
-      "python-openai": { "runs": 4, "passed": 13, "failed": 3, "bugsCaught": 3 },
-      "nodejs-langchain": { "runs": 4, "passed": 17, "failed": 2, "bugsCaught": 2 }
+      "python-openai": { "runs": 6, "passed": 30, "failed": 0, "bugsCaught": 0 },
+      "python-google-adk": { "runs": 4, "passed": 11, "failed": 9, "bugsCaught": 9 },
+      "nodejs-langchain": { "runs": 1, "passed": 4, "failed": 1, "bugsCaught": 1 },
+      "dotnet-semantic-kernel": { "runs": 1, "passed": 6, "failed": 0, "bugsCaught": 0 },
+      "all-samples": { "runs": 1, "passed": 20, "failed": 0, "bugsCaught": 0 }
     },
-    "totalBugsCaught": 7,
-    "totalTestsRun": 52,
-    "totalPassed": 45,
-    "totalFailed": 7
+    "totalBugsCaught": 10,
+    "totalTestsRun": 81,
+    "totalPassed": 71,
+    "totalFailed": 10
   },
   "pullRequests": [
     {
-      "number": 220,
-      "title": "Fix SK 1.45.0 streaming API breaking change",
-      "url": "https://github.com/microsoft/Agent365-Samples/pull/220",
-      "author": "anabdul",
-      "createdAt": "2026-02-04T16:00:00Z",
+      "number": 210,
+      "title": "Get actual resolved SDK versions instead of wildcards",
+      "url": "https://github.com/microsoft/Agent365-Samples/pull/210",
+      "author": "abdulanu0",
+      "createdAt": "2026-02-04T11:00:00Z",
       "merged": true,
       "draft": false,
-      "fixes": [
-        {
-          "number": 210,
-          "url": "https://github.com/microsoft/Agent365-Samples/issues/210"
-        }
-      ]
+      "fixes": []
     },
     {
-      "number": 218,
-      "title": "Update LangChain to 0.3.15 with new tool schema",
-      "url": "https://github.com/microsoft/Agent365-Samples/pull/218",
-      "author": "anabdul",
-      "createdAt": "2026-02-04T09:00:00Z",
-      "merged": true,
+      "number": 205,
+      "title": "Adding new samples to the test suite",
+      "url": "https://github.com/microsoft/Agent365-Samples/pull/205",
+      "author": "abdulanu0",
+      "createdAt": "2026-02-05T11:30:00Z",
+      "merged": false,
       "draft": false,
       "fixes": [
-        {
-          "number": 208,
-          "url": "https://github.com/microsoft/Agent365-Samples/issues/208"
-        }
+        { "number": 215, "url": "https://github.com/microsoft/Agent365-Samples/issues/215" },
+        { "number": 216, "url": "https://github.com/microsoft/Agent365-Samples/issues/216" }
       ]
     },
     {
-      "number": 225,
-      "title": "Fix MCP toolset connection timeout issue",
-      "url": "https://github.com/microsoft/Agent365-Samples/pull/225",
-      "author": "anabdul",
-      "createdAt": "2026-02-05T11:00:00Z",
+      "number": 190,
+      "title": "Adding notifications to python agent",
+      "url": "https://github.com/microsoft/Agent365-Samples/pull/190",
+      "author": "abdulanu0",
+      "createdAt": "2026-02-05T11:30:00Z",
       "merged": false,
       "draft": false,
-      "fixes": [
-        {
-          "number": 215,
-          "url": "https://github.com/microsoft/Agent365-Samples/issues/215"
-        }
-      ]
+      "fixes": []
     },
     {
-      "number": 226,
-      "title": "Add notification type field handling",
-      "url": "https://github.com/microsoft/Agent365-Samples/pull/226",
-      "author": "anabdul",
-      "createdAt": "2026-02-05T12:00:00Z",
+      "number": 211,
+      "title": "Edits to Copilot Studio E2E Test Workflow",
+      "url": "https://github.com/microsoft/Agent365-Samples/pull/211",
+      "author": "JesuTerraz",
+      "createdAt": "2026-02-04T13:39:00Z",
       "merged": false,
-      "draft": true,
-      "fixes": [
-        {
-          "number": 216,
-          "url": "https://github.com/microsoft/Agent365-Samples/issues/216"
-        }
-      ]
+      "draft": false,
+      "fixes": []
     }
   ]
 }
diff --git a/docs/metrics/index.html b/docs/metrics/index.html
index 56b9b5d9..129224ad 100644
--- a/docs/metrics/index.html
+++ b/docs/metrics/index.html
@@ -666,19 +666,27 @@ <h2>🧪 Sample Test Results</h2>
         Chart.defaults.borderColor = '#30363d';
 
         let metricsData = null;
+        
+        // Fallback data in case fetch fails (for local file:// access)
+        const fallbackData = {"lastUpdated":"2026-02-05T15:40:00Z","totalRuns":24,"entries":[{"id":"run-83","timestamp":"2026-02-05T15:30:00Z","stage":"post-checkin","sampleName":"python-openai","sdkVersions":{"microsoft-agents-core":"0.1.6","microsoft-agents-hosting-aiohttp":"0.1.6","openai":"1.59.9"},"sdkValidation":{"allUpToDate":true,"packagesChecked":3,"upToDate":3,"outdated":0,"usingPreRelease":0,"packages":[{"package":"microsoft-agents-core","installed":"0.1.6","latest":"0.1.6","isUpToDate":true,"isPreRelease":false}]},"testResults":{"status":"passed","total":5,"passed":5,"failed":0,"skipped":0},"bugsCaught":{"count":0,"details":[]},"runUrl":"https://github.com/microsoft/Agent365-Samples/actions/runs/21732666002"},{"id":"run-80","timestamp":"2026-02-05T13:36:00Z","stage":"pre-checkin","sampleName":"python-google-adk","sdkVersions":{"microsoft-agents-core":"0.1.6","google-adk":"0.3.0"},"testResults":{"status":"failed","total":5,"passed":3,"failed":2,"skipped":0},"bugsCaught":{"count":2,"details":[{"testName":"test_mcp_email_tools_registration","errorMessage":"McpToolset failed to connect - connection timeout","issueNumber":215,"issueUrl":"https://github.com/microsoft/Agent365-Samples/issues/215","issueState":"open"},{"testName":"test_agent_tool_execution","errorMessage":"Google ADK McpToolset missing A365 extension","issueNumber":216,"issueUrl":"https://github.com/microsoft/Agent365-Samples/issues/216","issueState":"open"}]},"runUrl":"https://github.com/microsoft/Agent365-Samples/actions/runs/21729406849"},{"id":"run-67","timestamp":"2026-02-04T22:21:00Z","stage":"scheduled","sampleName":"nodejs-langchain","sdkVersions":{"langchain":"0.3.14","@langchain/core":"0.3.30"},"sdkValidation":{"allUpToDate":false,"packagesChecked":2,"upToDate":1,"outdated":1,"usingPreRelease":0,"packages":[{"package":"langchain","installed":"0.3.14","latest":"0.3.15","isUpToDate":false,"isPreRelease":false}]},"testResults":{"status":"failed","total":5,"passed":4,"failed":1,"skipped":0},"bugsCaught":{"count":1,"details":[{"testName":"test_tool_calling_format","errorMessage":"LangChain tool schema format changed","issueNumber":208,"issueUrl":"https://github.com/microsoft/Agent365-Samples/issues/208","issueState":"closed"}]},"runUrl":"https://github.com/microsoft/Agent365-Samples/actions/runs/21701195841"},{"id":"run-62","timestamp":"2026-02-04T11:05:00Z","stage":"pre-checkin","sampleName":"dotnet-semantic-kernel","sdkVersions":{"Microsoft.Agents.Hosting.AspNetCore":"0.1.15-preview","Microsoft.SemanticKernel":"1.35.0"},"sdkValidation":{"allUpToDate":true,"packagesChecked":2,"upToDate":2,"outdated":0,"usingPreRelease":1,"packages":[{"package":"Microsoft.Agents.Hosting.AspNetCore","installed":"0.1.15-preview","latest":"0.1.15-preview","isUpToDate":true,"isPreRelease":true}]},"testResults":{"status":"passed","total":6,"passed":6,"failed":0,"skipped":0},"bugsCaught":{"count":0,"details":[]},"runUrl":"https://github.com/microsoft/Agent365-Samples/actions/runs/21684611327"}],"summary":{"byStage":{"pre-release":{"runs":0,"passed":0,"failed":0,"bugsCaught":0},"pre-checkin":{"runs":9,"passed":25,"failed":11,"bugsCaught":9},"post-checkin":{"runs":3,"passed":16,"failed":0,"bugsCaught":0},"release":{"runs":0,"passed":0,"failed":0,"bugsCaught":0},"scheduled":{"runs":2,"passed":24,"failed":1,"bugsCaught":1}},"bySample":{"python-openai":{"runs":6,"passed":30,"failed":0,"bugsCaught":0},"python-google-adk":{"runs":4,"passed":11,"failed":9,"bugsCaught":9},"nodejs-langchain":{"runs":1,"passed":4,"failed":1,"bugsCaught":1},"dotnet-semantic-kernel":{"runs":1,"passed":6,"failed":0,"bugsCaught":0}},"totalBugsCaught":10,"totalTestsRun":81,"totalPassed":71,"totalFailed":10},"pullRequests":[{"number":205,"title":"Adding new samples to the test suite","url":"https://github.com/microsoft/Agent365-Samples/pull/205","author":"abdulanu0","createdAt":"2026-02-05T11:30:00Z","merged":false,"draft":false,"fixes":[{"number":215,"url":"https://github.com/microsoft/Agent365-Samples/issues/215"}]},{"number":190,"title":"Adding notifications to python agent","url":"https://github.com/microsoft/Agent365-Samples/pull/190","author":"abdulanu0","createdAt":"2026-02-05T11:30:00Z","merged":false,"draft":false,"fixes":[]}]};
 
         async function loadMetrics() {
             try {
+                console.log('Fetching history.json...');
                 const response = await fetch('history.json');
+                console.log('Response status:', response.status);
                 if (!response.ok) {
-                    throw new Error('Failed to load metrics data');
+                    throw new Error('Failed to load metrics data: ' + response.status);
                 }
-                metricsData = await response.json();
+                const text = await response.text();
+                console.log('Response length:', text.length);
+                metricsData = JSON.parse(text);
+                console.log('Parsed data, totalBugsCaught:', metricsData.summary?.totalBugsCaught);
                 renderDashboard();
             } catch (error) {
-                console.error('Error loading metrics:', error);
-                document.getElementById('lastUpdated').textContent = 'Waiting for E2E test data...';
-                document.getElementById('lastUpdated').style.color = '#d29922';
+                console.warn('Fetch failed, using embedded fallback data:', error.message);
+                metricsData = fallbackData;
+                renderDashboard();
             }
         }
 

From 8fd861772f02ed491e0566e252b99fc2f1c3d93c Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Thu, 5 Feb 2026 16:15:04 -0800
Subject: [PATCH 3/9] Add dashboard styling enhancements: animations, hover
 effects, status indicator

---
 docs/metrics/index.html | 169 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 157 insertions(+), 12 deletions(-)

diff --git a/docs/metrics/index.html b/docs/metrics/index.html
index 129224ad..2ddf5fdd 100644
--- a/docs/metrics/index.html
+++ b/docs/metrics/index.html
@@ -26,6 +26,21 @@
             box-sizing: border-box;
         }
 
+        @keyframes fadeInUp {
+            from { opacity: 0; transform: translateY(20px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+
+        @keyframes pulse {
+            0%, 100% { transform: scale(1); }
+            50% { transform: scale(1.05); }
+        }
+
+        @keyframes countUp {
+            from { opacity: 0; }
+            to { opacity: 1; }
+        }
+
         body {
             font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
             background-color: var(--bg-primary);
@@ -41,14 +56,20 @@
 
         header {
             text-align: center;
-            padding: 30px 0;
+            padding: 40px 0;
             border-bottom: 1px solid var(--border-color);
             margin-bottom: 30px;
+            background: linear-gradient(180deg, var(--bg-secondary) 0%, var(--bg-primary) 100%);
+            animation: fadeInUp 0.6s ease-out;
         }
 
         header h1 {
             font-size: 2.5rem;
             margin-bottom: 10px;
+            background: linear-gradient(135deg, var(--accent-blue) 0%, var(--accent-purple) 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            background-clip: text;
         }
 
         header p {
@@ -75,6 +96,13 @@
             border: 1px solid var(--border-color);
             border-radius: 8px;
             padding: 20px;
+            transition: transform 0.2s ease, box-shadow 0.2s ease;
+            animation: fadeInUp 0.6s ease-out backwards;
+        }
+
+        .card:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
         }
 
         .card h3 {
@@ -116,8 +144,25 @@
             padding: 25px;
             position: relative;
             overflow: hidden;
+            transition: transform 0.2s ease, box-shadow 0.2s ease, border-color 0.2s ease;
+            animation: fadeInUp 0.6s ease-out backwards;
+        }
+
+        .stage-card:nth-child(1) { animation-delay: 0.1s; }
+        .stage-card:nth-child(2) { animation-delay: 0.2s; }
+        .stage-card:nth-child(3) { animation-delay: 0.3s; }
+        .stage-card:nth-child(4) { animation-delay: 0.4s; }
+
+        .stage-card:hover {
+            transform: translateY(-4px);
+            box-shadow: 0 8px 24px rgba(0, 0, 0, 0.4);
         }
 
+        .stage-card.pre-release:hover { border-color: var(--accent-blue); }
+        .stage-card.pre-checkin:hover { border-color: var(--accent-green); }
+        .stage-card.post-checkin:hover { border-color: var(--accent-yellow); }
+        .stage-card.release:hover { border-color: var(--accent-red); }
+
         .stage-card::before {
             content: '';
             position: absolute;
@@ -194,6 +239,13 @@
             border: 1px solid var(--border-color);
             border-radius: 8px;
             padding: 20px;
+            transition: transform 0.2s ease, box-shadow 0.2s ease;
+            animation: fadeInUp 0.6s ease-out backwards;
+        }
+
+        .chart-card:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
         }
 
         .chart-card h3 {
@@ -230,6 +282,14 @@
             border-bottom: 1px solid var(--border-color);
         }
 
+        tbody tr {
+            transition: background-color 0.15s ease;
+        }
+
+        tbody tr:hover {
+            background-color: var(--bg-tertiary);
+        }
+
         th {
             background: var(--bg-tertiary);
             color: var(--text-secondary);
@@ -458,12 +518,55 @@
         /* Footer */
         footer {
             text-align: center;
-            padding: 30px;
+            padding: 40px 30px;
             color: var(--text-secondary);
             border-top: 1px solid var(--border-color);
             margin-top: 40px;
+            background: linear-gradient(180deg, var(--bg-primary) 0%, var(--bg-secondary) 100%);
+        }
+
+        footer p {
+            margin: 5px 0;
+        }
+
+        footer .footer-links {
+            margin-top: 15px;
+        }
+
+        footer .footer-links a {
+            color: var(--accent-blue);
+            text-decoration: none;
+            margin: 0 15px;
+            transition: color 0.2s;
+        }
+
+        footer .footer-links a:hover {
+            color: var(--accent-purple);
+            text-decoration: underline;
+        }
+
+        /* Live Status Indicator */
+        .status-indicator {
+            display: inline-flex;
+            align-items: center;
+            gap: 8px;
+            padding: 6px 12px;
+            background: var(--bg-tertiary);
+            border-radius: 20px;
+            font-size: 0.85rem;
+            margin-top: 15px;
         }
 
+        .status-dot {
+            width: 8px;
+            height: 8px;
+            border-radius: 50%;
+            animation: pulse 2s ease-in-out infinite;
+        }
+
+        .status-dot.live { background: var(--accent-green); }
+        .status-dot.stale { background: var(--accent-yellow); }
+
         /* Value Proposition */
         .value-prop {
             background: linear-gradient(135deg, var(--bg-secondary), var(--bg-tertiary));
@@ -498,6 +601,10 @@
             <h1>🛡️ Agent 365 SDK Integration Testing</h1>
             <p>Catching SDK issues before they reach production</p>
             <div class="last-updated" id="lastUpdated">Loading...</div>
+            <div class="status-indicator">
+                <span class="status-dot live" id="statusDot"></span>
+                <span id="statusText">Live data</span>
+            </div>
         </header>
 
         <!-- Value Proposition -->
@@ -655,8 +762,13 @@ <h2>🧪 Sample Test Results</h2>
         </div>
 
         <footer>
-            <p>Agent 365 SDK Integration Testing Dashboard</p>
+            <p><strong>Agent 365 SDK Integration Testing Dashboard</strong></p>
             <p>Automated E2E testing ensures SDK compatibility across all sample implementations</p>
+            <div class="footer-links">
+                <a href="https://github.com/microsoft/Agent365-Samples/actions" target="_blank">📋 View All Runs</a>
+                <a href="https://github.com/microsoft/Agent365-Samples/issues?q=is%3Aissue+label%3Ae2e-failure" target="_blank">🐛 E2E Issues</a>
+                <a href="https://github.com/microsoft/Agent365-Samples" target="_blank">📂 Repository</a>
+            </div>
         </footer>
     </div>
 
@@ -690,23 +802,56 @@ <h2>🧪 Sample Test Results</h2>
             }
         }
 
+        // Animate counter from 0 to target value
+        function animateCounter(elementId, target) {
+            const element = document.getElementById(elementId);
+            const duration = 1000;
+            const steps = 30;
+            const stepDuration = duration / steps;
+            let current = 0;
+            const increment = target / steps;
+            
+            const timer = setInterval(() => {
+                current += increment;
+                if (current >= target) {
+                    element.textContent = target.toLocaleString();
+                    clearInterval(timer);
+                } else {
+                    element.textContent = Math.floor(current).toLocaleString();
+                }
+            }, stepDuration);
+        }
+
         function renderDashboard() {
             if (!metricsData) return;
 
-            // Update last updated
+            // Update last updated with status indicator
+            const lastUpdatedDate = new Date(metricsData.lastUpdated);
+            const now = new Date();
+            const hoursSinceUpdate = (now - lastUpdatedDate) / (1000 * 60 * 60);
+            
             document.getElementById('lastUpdated').textContent = 
-                `Last updated: ${new Date(metricsData.lastUpdated).toLocaleString()}`;
+                `Last updated: ${lastUpdatedDate.toLocaleString()}`;
+            
+            const statusDot = document.getElementById('statusDot');
+            const statusText = document.getElementById('statusText');
+            if (hoursSinceUpdate < 24) {
+                statusDot.className = 'status-dot live';
+                statusText.textContent = 'Live data';
+            } else {
+                statusDot.className = 'status-dot stale';
+                statusText.textContent = `Updated ${Math.floor(hoursSinceUpdate / 24)}d ago`;
+            }
 
-            // Update total bugs caught
-            document.getElementById('totalBugsCaught').textContent = 
-                metricsData.summary.totalBugsCaught.toLocaleString();
+            // Update total bugs caught with animation
+            animateCounter('totalBugsCaught', metricsData.summary.totalBugsCaught);
 
             // Update stage cards
             const stages = metricsData.summary.byStage;
-            document.getElementById('preReleaseBugs').textContent = stages['pre-release']?.bugsCaught || 0;
-            document.getElementById('preCheckinBugs').textContent = stages['pre-checkin']?.bugsCaught || 0;
-            document.getElementById('postCheckinBugs').textContent = stages['post-checkin']?.bugsCaught || 0;
-            document.getElementById('releaseBugs').textContent = stages['release']?.bugsCaught || 0;
+            animateCounter('preReleaseBugs', stages['pre-release']?.bugsCaught || 0);
+            animateCounter('preCheckinBugs', stages['pre-checkin']?.bugsCaught || 0);
+            animateCounter('postCheckinBugs', stages['post-checkin']?.bugsCaught || 0);
+            animateCounter('releaseBugs', stages['release']?.bugsCaught || 0);
 
             // Update SDK version lists for each stage
             renderSdkListForStage('pre-release', 'preReleaseSdkList');

From b3d2eac5b82dbb27a819a682f7204917de2e0930 Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Thu, 5 Feb 2026 17:09:53 -0800
Subject: [PATCH 4/9] Add E2E Metrics Dashboard with Agent 365 SDK tracking

- Dashboard shows SDK issues caught at each testing stage
- Filters to only Agent 365 SDK packages
- Error categorization with linked GitHub issues
- Sample test results with success rates
- Related PRs section with fix links
- Animated counters and polished dark theme
- Embedded demo data for offline preview
- Empty history.json ready for real E2E data
---
 docs/metrics/history.json | 347 +-------------------------------------
 docs/metrics/index.html   | 346 +++++++++++++++++++++++++++++--------
 2 files changed, 279 insertions(+), 414 deletions(-)

diff --git a/docs/metrics/history.json b/docs/metrics/history.json
index 4b2ff0d6..4f554cff 100644
--- a/docs/metrics/history.json
+++ b/docs/metrics/history.json
@@ -1,344 +1,7 @@
 {
-  "lastUpdated": "2026-02-05T15:40:00Z",
-  "totalRuns": 24,
-  "entries": [
-    {
-      "id": "run-83",
-      "timestamp": "2026-02-05T15:30:00Z",
-      "stage": "post-checkin",
-      "sampleName": "python-openai",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "microsoft-agents-hosting-aiohttp": "0.1.6",
-        "openai": "1.59.9"
-      },
-      "sdkValidation": {
-        "allUpToDate": true,
-        "packagesChecked": 3,
-        "upToDate": 3,
-        "outdated": 0,
-        "usingPreRelease": 0,
-        "packages": [
-          { "package": "microsoft-agents-core", "installed": "0.1.6", "latest": "0.1.6", "isUpToDate": true, "isPreRelease": false },
-          { "package": "microsoft-agents-hosting-aiohttp", "installed": "0.1.6", "latest": "0.1.6", "isUpToDate": true, "isPreRelease": false },
-          { "package": "openai", "installed": "1.59.9", "latest": "1.59.9", "isUpToDate": true, "isPreRelease": false }
-        ]
-      },
-      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
-      "bugsCaught": { "count": 0, "details": [] },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21732666002"
-    },
-    {
-      "id": "run-82",
-      "timestamp": "2026-02-05T14:05:00Z",
-      "stage": "pre-checkin",
-      "sampleName": "python-openai",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "microsoft-agents-hosting-aiohttp": "0.1.6",
-        "openai": "1.59.9"
-      },
-      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
-      "bugsCaught": { "count": 0, "details": [] },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21730254989"
-    },
-    {
-      "id": "run-81",
-      "timestamp": "2026-02-05T13:45:00Z",
-      "stage": "pre-checkin",
-      "sampleName": "python-openai",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "openai": "1.59.9"
-      },
-      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
-      "bugsCaught": { "count": 0, "details": [] },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21729679961"
-    },
-    {
-      "id": "run-80",
-      "timestamp": "2026-02-05T13:36:00Z",
-      "stage": "pre-checkin",
-      "sampleName": "python-google-adk",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "google-adk": "0.3.0"
-      },
-      "testResults": { "status": "failed", "total": 5, "passed": 3, "failed": 2, "skipped": 0 },
-      "bugsCaught": {
-        "count": 2,
-        "details": [
-          {
-            "testName": "test_mcp_email_tools_registration",
-            "errorMessage": "McpToolset failed to connect - connection timeout while registering MCP tools",
-            "issueNumber": 215,
-            "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/215",
-            "issueState": "open"
-          },
-          {
-            "testName": "test_agent_tool_execution",
-            "errorMessage": "Google ADK McpToolset does not have official A365 extension - custom implementation needed",
-            "issueNumber": 216,
-            "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/216",
-            "issueState": "open"
-          }
-        ]
-      },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21729406849"
-    },
-    {
-      "id": "run-79",
-      "timestamp": "2026-02-05T13:23:00Z",
-      "stage": "pre-checkin",
-      "sampleName": "python-google-adk",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "google-adk": "0.3.0"
-      },
-      "testResults": { "status": "failed", "total": 5, "passed": 3, "failed": 2, "skipped": 0 },
-      "bugsCaught": {
-        "count": 2,
-        "details": [
-          {
-            "testName": "test_mcp_email_tools_registration",
-            "errorMessage": "McpToolset connection timeout with MCP server"
-          },
-          {
-            "testName": "test_agent_tool_execution",
-            "errorMessage": "Tool registration failed - missing A365 extension"
-          }
-        ]
-      },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21729024731"
-    },
-    {
-      "id": "run-78",
-      "timestamp": "2026-02-05T11:52:00Z",
-      "stage": "pre-checkin",
-      "sampleName": "python-google-adk",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "google-adk": "0.3.0"
-      },
-      "testResults": { "status": "failed", "total": 5, "passed": 3, "failed": 2, "skipped": 0 },
-      "bugsCaught": {
-        "count": 2,
-        "details": [
-          {
-            "testName": "test_mcp_email_tools",
-            "errorMessage": "MCP toolset connection refused - server not responding"
-          },
-          {
-            "testName": "test_tool_calling",
-            "errorMessage": "Tool schema validation failed"
-          }
-        ]
-      },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21726196689"
-    },
-    {
-      "id": "run-75",
-      "timestamp": "2026-02-05T11:33:00Z",
-      "stage": "pre-checkin",
-      "sampleName": "python-openai",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "openai": "1.59.9"
-      },
-      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
-      "bugsCaught": { "count": 0, "details": [] },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21725610414"
-    },
-    {
-      "id": "run-73",
-      "timestamp": "2026-02-05T11:13:00Z",
-      "stage": "post-checkin",
-      "sampleName": "python-openai",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "openai": "1.59.9"
-      },
-      "testResults": { "status": "passed", "total": 5, "passed": 5, "failed": 0, "skipped": 0 },
-      "bugsCaught": { "count": 0, "details": [] },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21724968616"
-    },
-    {
-      "id": "run-71",
-      "timestamp": "2026-02-05T10:18:00Z",
-      "stage": "scheduled",
-      "sampleName": "all-samples",
-      "sdkVersions": {
-        "microsoft-agents-core": "0.1.6",
-        "Microsoft.Agents.Hosting.AspNetCore": "0.1.15-preview",
-        "openai": "1.59.9",
-        "langchain": "0.3.15"
-      },
-      "sdkValidation": {
-        "allUpToDate": true,
-        "packagesChecked": 4,
-        "upToDate": 4,
-        "outdated": 0,
-        "usingPreRelease": 1,
-        "packages": [
-          { "package": "Microsoft.Agents.Hosting.AspNetCore", "installed": "0.1.15-preview", "latest": "0.1.15-preview", "isUpToDate": true, "isPreRelease": true }
-        ]
-      },
-      "testResults": { "status": "passed", "total": 20, "passed": 20, "failed": 0, "skipped": 0 },
-      "bugsCaught": { "count": 0, "details": [] },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21723256223"
-    },
-    {
-      "id": "run-67",
-      "timestamp": "2026-02-04T22:21:00Z",
-      "stage": "scheduled",
-      "sampleName": "nodejs-langchain",
-      "sdkVersions": {
-        "langchain": "0.3.14",
-        "@langchain/core": "0.3.30"
-      },
-      "sdkValidation": {
-        "allUpToDate": false,
-        "packagesChecked": 2,
-        "upToDate": 1,
-        "outdated": 1,
-        "usingPreRelease": 0,
-        "packages": [
-          { "package": "langchain", "installed": "0.3.14", "latest": "0.3.15", "isUpToDate": false, "isPreRelease": false },
-          { "package": "@langchain/core", "installed": "0.3.30", "latest": "0.3.30", "isUpToDate": true, "isPreRelease": false }
-        ]
-      },
-      "testResults": { "status": "failed", "total": 5, "passed": 4, "failed": 1, "skipped": 0 },
-      "bugsCaught": {
-        "count": 1,
-        "details": [
-          {
-            "testName": "test_tool_calling_format",
-            "errorMessage": "LangChain tool schema format changed - breaking change in structured output",
-            "issueNumber": 208,
-            "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/208",
-            "issueState": "closed"
-          }
-        ]
-      },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21701195841"
-    },
-    {
-      "id": "run-64",
-      "timestamp": "2026-02-04T14:00:00Z",
-      "stage": "pre-checkin",
-      "sampleName": "python-google-adk",
-      "sdkVersions": {
-        "google-adk": "0.3.0",
-        "microsoft-agents-core": "0.1.6"
-      },
-      "testResults": { "status": "failed", "total": 5, "passed": 2, "failed": 3, "skipped": 0 },
-      "bugsCaught": {
-        "count": 3,
-        "details": [
-          {
-            "testName": "test_copilot_studio_connection",
-            "errorMessage": "Copilot Studio authentication failed - token expired"
-          },
-          {
-            "testName": "test_adk_agent_creation",
-            "errorMessage": "Google ADK agent initialization failed - missing configuration"
-          },
-          {
-            "testName": "test_mcp_tools",
-            "errorMessage": "MCP connection timeout"
-          }
-        ]
-      },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21690045934"
-    },
-    {
-      "id": "run-62",
-      "timestamp": "2026-02-04T11:05:00Z",
-      "stage": "pre-checkin",
-      "sampleName": "dotnet-semantic-kernel",
-      "sdkVersions": {
-        "Microsoft.Agents.Hosting.AspNetCore": "0.1.15-preview",
-        "Microsoft.SemanticKernel": "1.35.0"
-      },
-      "sdkValidation": {
-        "allUpToDate": true,
-        "packagesChecked": 2,
-        "upToDate": 2,
-        "outdated": 0,
-        "usingPreRelease": 1,
-        "packages": [
-          { "package": "Microsoft.Agents.Hosting.AspNetCore", "installed": "0.1.15-preview", "latest": "0.1.15-preview", "isUpToDate": true, "isPreRelease": true },
-          { "package": "Microsoft.SemanticKernel", "installed": "1.35.0", "latest": "1.35.0", "isUpToDate": true, "isPreRelease": false }
-        ]
-      },
-      "testResults": { "status": "passed", "total": 6, "passed": 6, "failed": 0, "skipped": 0 },
-      "bugsCaught": { "count": 0, "details": [] },
-      "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21684611327"
-    }
-  ],
-  "summary": {
-    "byStage": {
-      "pre-release": { "runs": 0, "passed": 0, "failed": 0, "bugsCaught": 0 },
-      "pre-checkin": { "runs": 9, "passed": 25, "failed": 11, "bugsCaught": 9 },
-      "post-checkin": { "runs": 3, "passed": 16, "failed": 0, "bugsCaught": 0 },
-      "release": { "runs": 0, "passed": 0, "failed": 0, "bugsCaught": 0 },
-      "scheduled": { "runs": 2, "passed": 24, "failed": 1, "bugsCaught": 1 }
-    },
-    "bySample": {
-      "python-openai": { "runs": 6, "passed": 30, "failed": 0, "bugsCaught": 0 },
-      "python-google-adk": { "runs": 4, "passed": 11, "failed": 9, "bugsCaught": 9 },
-      "nodejs-langchain": { "runs": 1, "passed": 4, "failed": 1, "bugsCaught": 1 },
-      "dotnet-semantic-kernel": { "runs": 1, "passed": 6, "failed": 0, "bugsCaught": 0 },
-      "all-samples": { "runs": 1, "passed": 20, "failed": 0, "bugsCaught": 0 }
-    },
-    "totalBugsCaught": 10,
-    "totalTestsRun": 81,
-    "totalPassed": 71,
-    "totalFailed": 10
-  },
-  "pullRequests": [
-    {
-      "number": 210,
-      "title": "Get actual resolved SDK versions instead of wildcards",
-      "url": "https://github.com/microsoft/Agent365-Samples/pull/210",
-      "author": "abdulanu0",
-      "createdAt": "2026-02-04T11:00:00Z",
-      "merged": true,
-      "draft": false,
-      "fixes": []
-    },
-    {
-      "number": 205,
-      "title": "Adding new samples to the test suite",
-      "url": "https://github.com/microsoft/Agent365-Samples/pull/205",
-      "author": "abdulanu0",
-      "createdAt": "2026-02-05T11:30:00Z",
-      "merged": false,
-      "draft": false,
-      "fixes": [
-        { "number": 215, "url": "https://github.com/microsoft/Agent365-Samples/issues/215" },
-        { "number": 216, "url": "https://github.com/microsoft/Agent365-Samples/issues/216" }
-      ]
-    },
-    {
-      "number": 190,
-      "title": "Adding notifications to python agent",
-      "url": "https://github.com/microsoft/Agent365-Samples/pull/190",
-      "author": "abdulanu0",
-      "createdAt": "2026-02-05T11:30:00Z",
-      "merged": false,
-      "draft": false,
-      "fixes": []
-    },
-    {
-      "number": 211,
-      "title": "Edits to Copilot Studio E2E Test Workflow",
-      "url": "https://github.com/microsoft/Agent365-Samples/pull/211",
-      "author": "JesuTerraz",
-      "createdAt": "2026-02-04T13:39:00Z",
-      "merged": false,
-      "draft": false,
-      "fixes": []
-    }
-  ]
+  "lastUpdated": null,
+  "totalRuns": 0,
+  "entries": [],
+  "summary": {},
+  "pullRequests": []
 }
diff --git a/docs/metrics/index.html b/docs/metrics/index.html
index 2ddf5fdd..a2b52c0b 100644
--- a/docs/metrics/index.html
+++ b/docs/metrics/index.html
@@ -609,9 +609,10 @@ <h1>🛡️ Agent 365 SDK Integration Testing</h1>
 
         <!-- Value Proposition -->
         <div class="value-prop">
-            <h2>Bugs Caught Before Production</h2>
+            <h2>SDK Issues Caught Before Production</h2>
             <div class="highlight" id="totalBugsCaught">0</div>
-            <p>SDK integration issues detected and prevented from reaching customers through automated E2E testing</p>
+            <p>E2E test failures that caught SDK compatibility issues before they reached customers.<br>
+            <small style="color: var(--text-secondary);">Each "issue" = a test failure indicating an Agent 365 SDK incompatibility with sample code</small></p>
         </div>
 
         <!-- Stage Cards - Main Focus -->
@@ -619,9 +620,9 @@ <h2>Bugs Caught Before Production</h2>
             <div class="stage-card pre-release">
                 <h3>🔬 Pre-Release Testing</h3>
                 <div class="bugs-caught" id="preReleaseBugs">0</div>
-                <div class="description">Bugs caught testing pre-release SDK versions before they're published</div>
+                <div class="description">Issues caught testing pre-release Agent 365 SDK versions</div>
                 <div class="sdk-versions">
-                    <h4>SDK Versions Tested</h4>
+                    <h4>Agent 365 SDK Versions</h4>
                     <div id="preReleaseSdkList">No data yet</div>
                 </div>
             </div>
@@ -629,9 +630,9 @@ <h4>SDK Versions Tested</h4>
             <div class="stage-card pre-checkin">
                 <h3>📝 Pre-Checkin (PR)</h3>
                 <div class="bugs-caught" id="preCheckinBugs">0</div>
-                <div class="description">Bugs caught in pull requests before code is merged</div>
+                <div class="description">Issues caught in PRs before sample code is merged</div>
                 <div class="sdk-versions">
-                    <h4>SDK Versions Tested</h4>
+                    <h4>Agent 365 SDK Versions</h4>
                     <div id="preCheckinSdkList">No data yet</div>
                 </div>
             </div>
@@ -639,9 +640,9 @@ <h4>SDK Versions Tested</h4>
             <div class="stage-card post-checkin">
                 <h3>✅ Post-Checkin (Main)</h3>
                 <div class="bugs-caught" id="postCheckinBugs">0</div>
-                <div class="description">Bugs caught after merge - regressions detected</div>
+                <div class="description">SDK regressions detected after merge to main</div>
                 <div class="sdk-versions">
-                    <h4>SDK Versions Tested</h4>
+                    <h4>Agent 365 SDK Versions</h4>
                     <div id="postCheckinSdkList">No data yet</div>
                 </div>
             </div>
@@ -649,9 +650,9 @@ <h4>SDK Versions Tested</h4>
             <div class="stage-card release">
                 <h3>🚀 Release Validation</h3>
                 <div class="bugs-caught" id="releaseBugs">0</div>
-                <div class="description">Final validation before SDK release to production</div>
+                <div class="description">Final SDK compatibility check before release</div>
                 <div class="sdk-versions">
-                    <h4>SDK Versions Validated</h4>
+                    <h4>Agent 365 SDK Versions</h4>
                     <div id="releaseSdkList">No data yet</div>
                 </div>
             </div>
@@ -698,9 +699,9 @@ <h2>🔍 Error Categories</h2>
 
         <!-- SDK Version Tracking Table -->
         <div class="sdk-section">
-            <h2>📦 SDK Version Validation</h2>
+            <h2>📦 Agent 365 SDK Version Validation</h2>
             <p style="color: var(--text-secondary); margin-bottom: 15px;">
-                Confirming E2E tests use the latest SDK versions (including pre-release) to catch issues early
+                Tracking Agent 365 SDK packages (<code>microsoft-agents-*</code>) used in E2E tests to catch compatibility issues early
             </p>
             <table id="sdkTable">
                 <thead>
@@ -719,12 +720,15 @@ <h2>📦 SDK Version Validation</h2>
             </table>
         </div>
 
-        <!-- Recent Bugs Caught -->
+        <!-- Recent SDK Issues Caught -->
         <div class="sdk-section">
-            <h2>🐛 Recent Bugs Caught</h2>
+            <h2>🐛 Recent SDK Compatibility Issues</h2>
+            <p style="color: var(--text-secondary); margin-bottom: 15px;">
+                Test failures that indicate Agent 365 SDK incompatibilities with sample code
+            </p>
             <div class="bug-list" id="bugList">
                 <p style="color: var(--text-secondary); text-align: center; padding: 40px;">
-                    No bugs caught yet. When E2E tests detect issues, they'll appear here.
+                    No issues caught yet. When E2E tests detect SDK compatibility problems, they'll appear here.
                 </p>
             </div>
         </div>
@@ -780,7 +784,115 @@ <h2>🧪 Sample Test Results</h2>
         let metricsData = null;
         
         // Fallback data in case fetch fails (for local file:// access)
-        const fallbackData = {"lastUpdated":"2026-02-05T15:40:00Z","totalRuns":24,"entries":[{"id":"run-83","timestamp":"2026-02-05T15:30:00Z","stage":"post-checkin","sampleName":"python-openai","sdkVersions":{"microsoft-agents-core":"0.1.6","microsoft-agents-hosting-aiohttp":"0.1.6","openai":"1.59.9"},"sdkValidation":{"allUpToDate":true,"packagesChecked":3,"upToDate":3,"outdated":0,"usingPreRelease":0,"packages":[{"package":"microsoft-agents-core","installed":"0.1.6","latest":"0.1.6","isUpToDate":true,"isPreRelease":false}]},"testResults":{"status":"passed","total":5,"passed":5,"failed":0,"skipped":0},"bugsCaught":{"count":0,"details":[]},"runUrl":"https://github.com/microsoft/Agent365-Samples/actions/runs/21732666002"},{"id":"run-80","timestamp":"2026-02-05T13:36:00Z","stage":"pre-checkin","sampleName":"python-google-adk","sdkVersions":{"microsoft-agents-core":"0.1.6","google-adk":"0.3.0"},"testResults":{"status":"failed","total":5,"passed":3,"failed":2,"skipped":0},"bugsCaught":{"count":2,"details":[{"testName":"test_mcp_email_tools_registration","errorMessage":"McpToolset failed to connect - connection timeout","issueNumber":215,"issueUrl":"https://github.com/microsoft/Agent365-Samples/issues/215","issueState":"open"},{"testName":"test_agent_tool_execution","errorMessage":"Google ADK McpToolset missing A365 extension","issueNumber":216,"issueUrl":"https://github.com/microsoft/Agent365-Samples/issues/216","issueState":"open"}]},"runUrl":"https://github.com/microsoft/Agent365-Samples/actions/runs/21729406849"},{"id":"run-67","timestamp":"2026-02-04T22:21:00Z","stage":"scheduled","sampleName":"nodejs-langchain","sdkVersions":{"langchain":"0.3.14","@langchain/core":"0.3.30"},"sdkValidation":{"allUpToDate":false,"packagesChecked":2,"upToDate":1,"outdated":1,"usingPreRelease":0,"packages":[{"package":"langchain","installed":"0.3.14","latest":"0.3.15","isUpToDate":false,"isPreRelease":false}]},"testResults":{"status":"failed","total":5,"passed":4,"failed":1,"skipped":0},"bugsCaught":{"count":1,"details":[{"testName":"test_tool_calling_format","errorMessage":"LangChain tool schema format changed","issueNumber":208,"issueUrl":"https://github.com/microsoft/Agent365-Samples/issues/208","issueState":"closed"}]},"runUrl":"https://github.com/microsoft/Agent365-Samples/actions/runs/21701195841"},{"id":"run-62","timestamp":"2026-02-04T11:05:00Z","stage":"pre-checkin","sampleName":"dotnet-semantic-kernel","sdkVersions":{"Microsoft.Agents.Hosting.AspNetCore":"0.1.15-preview","Microsoft.SemanticKernel":"1.35.0"},"sdkValidation":{"allUpToDate":true,"packagesChecked":2,"upToDate":2,"outdated":0,"usingPreRelease":1,"packages":[{"package":"Microsoft.Agents.Hosting.AspNetCore","installed":"0.1.15-preview","latest":"0.1.15-preview","isUpToDate":true,"isPreRelease":true}]},"testResults":{"status":"passed","total":6,"passed":6,"failed":0,"skipped":0},"bugsCaught":{"count":0,"details":[]},"runUrl":"https://github.com/microsoft/Agent365-Samples/actions/runs/21684611327"}],"summary":{"byStage":{"pre-release":{"runs":0,"passed":0,"failed":0,"bugsCaught":0},"pre-checkin":{"runs":9,"passed":25,"failed":11,"bugsCaught":9},"post-checkin":{"runs":3,"passed":16,"failed":0,"bugsCaught":0},"release":{"runs":0,"passed":0,"failed":0,"bugsCaught":0},"scheduled":{"runs":2,"passed":24,"failed":1,"bugsCaught":1}},"bySample":{"python-openai":{"runs":6,"passed":30,"failed":0,"bugsCaught":0},"python-google-adk":{"runs":4,"passed":11,"failed":9,"bugsCaught":9},"nodejs-langchain":{"runs":1,"passed":4,"failed":1,"bugsCaught":1},"dotnet-semantic-kernel":{"runs":1,"passed":6,"failed":0,"bugsCaught":0}},"totalBugsCaught":10,"totalTestsRun":81,"totalPassed":71,"totalFailed":10},"pullRequests":[{"number":205,"title":"Adding new samples to the test suite","url":"https://github.com/microsoft/Agent365-Samples/pull/205","author":"abdulanu0","createdAt":"2026-02-05T11:30:00Z","merged":false,"draft":false,"fixes":[{"number":215,"url":"https://github.com/microsoft/Agent365-Samples/issues/215"}]},{"number":190,"title":"Adding notifications to python agent","url":"https://github.com/microsoft/Agent365-Samples/pull/190","author":"abdulanu0","createdAt":"2026-02-05T11:30:00Z","merged":false,"draft":false,"fixes":[]}]};
+        // Updated with comprehensive Agent 365 SDK packages
+        const fallbackData = {
+            "lastUpdated": "2026-02-05T15:40:00Z",
+            "totalRuns": 12,
+            "entries": [
+                {
+                    "id": "run-83",
+                    "timestamp": "2026-02-05T15:30:00Z",
+                    "stage": "post-checkin",
+                    "sampleName": "python-openai",
+                    "sdkVersions": {
+                        "microsoft-agents-core": "0.1.6",
+                        "microsoft-agents-hosting-aiohttp": "0.1.6",
+                        "microsoft-agents-tooling": "0.1.6",
+                        "microsoft-agents-observability": "0.1.6"
+                    },
+                    "testResults": { "status": "passed", "total": 8, "passed": 8, "failed": 0, "skipped": 0 },
+                    "bugsCaught": { "count": 0, "details": [] },
+                    "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21732666002"
+                },
+                {
+                    "id": "run-82",
+                    "timestamp": "2026-02-05T14:00:00Z",
+                    "stage": "pre-checkin",
+                    "sampleName": "python-notifications",
+                    "sdkVersions": {
+                        "microsoft-agents-core": "0.1.6",
+                        "microsoft-agents-notifications": "0.1.6",
+                        "microsoft-agents-hosting-aiohttp": "0.1.6"
+                    },
+                    "testResults": { "status": "failed", "total": 6, "passed": 4, "failed": 2, "skipped": 0 },
+                    "bugsCaught": {
+                        "count": 2,
+                        "details": [
+                            { "testName": "test_notification_send", "errorMessage": "Notification channel connection timeout", "issueNumber": 220, "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/220", "issueState": "open" },
+                            { "testName": "test_notification_ack", "errorMessage": "Notification acknowledgment failed - missing callback", "issueNumber": 221, "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/221", "issueState": "open" }
+                        ]
+                    },
+                    "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21730000001"
+                },
+                {
+                    "id": "run-81",
+                    "timestamp": "2026-02-05T12:00:00Z",
+                    "stage": "pre-checkin",
+                    "sampleName": "dotnet-semantic-kernel",
+                    "sdkVersions": {
+                        "Microsoft.Agents.Core": "0.1.15-preview",
+                        "Microsoft.Agents.Hosting.AspNetCore": "0.1.15-preview",
+                        "Microsoft.Agents.Tooling": "0.1.15-preview",
+                        "Microsoft.Agents.Observability": "0.1.15-preview"
+                    },
+                    "testResults": { "status": "passed", "total": 10, "passed": 10, "failed": 0, "skipped": 0 },
+                    "bugsCaught": { "count": 0, "details": [] },
+                    "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21728000001"
+                },
+                {
+                    "id": "run-80",
+                    "timestamp": "2026-02-05T10:00:00Z",
+                    "stage": "pre-release",
+                    "sampleName": "python-claude",
+                    "sdkVersions": {
+                        "microsoft-agents-core": "0.1.7-preview",
+                        "microsoft-agents-tooling": "0.1.7-preview",
+                        "microsoft-agents-hosting-aiohttp": "0.1.7-preview"
+                    },
+                    "testResults": { "status": "failed", "total": 6, "passed": 5, "failed": 1, "skipped": 0 },
+                    "bugsCaught": {
+                        "count": 1,
+                        "details": [
+                            { "testName": "test_tool_schema_validation", "errorMessage": "Breaking change in tool schema format in 0.1.7-preview", "issueNumber": 225, "issueUrl": "https://github.com/microsoft/Agent365-Samples/issues/225", "issueState": "open" }
+                        ]
+                    },
+                    "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21725000001"
+                },
+                {
+                    "id": "run-79",
+                    "timestamp": "2026-02-04T18:00:00Z",
+                    "stage": "post-checkin",
+                    "sampleName": "nodejs-langchain",
+                    "sdkVersions": {
+                        "@microsoft/agents-core": "0.1.6",
+                        "@microsoft/agents-hosting-express": "0.1.6",
+                        "@microsoft/agents-tooling": "0.1.6"
+                    },
+                    "testResults": { "status": "passed", "total": 7, "passed": 7, "failed": 0, "skipped": 0 },
+                    "bugsCaught": { "count": 0, "details": [] },
+                    "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21720000001"
+                },
+                {
+                    "id": "run-78",
+                    "timestamp": "2026-02-04T14:00:00Z",
+                    "stage": "release",
+                    "sampleName": "dotnet-agent-framework",
+                    "sdkVersions": {
+                        "Microsoft.Agents.Core": "0.1.14",
+                        "Microsoft.Agents.Hosting.AspNetCore": "0.1.14",
+                        "Microsoft.Agents.Notifications": "0.1.14"
+                    },
+                    "testResults": { "status": "passed", "total": 12, "passed": 12, "failed": 0, "skipped": 0 },
+                    "bugsCaught": { "count": 0, "details": [] },
+                    "runUrl": "https://github.com/microsoft/Agent365-Samples/actions/runs/21715000001"
+                }
+            ],
+            "summary": {},
+            "pullRequests": [
+                { "number": 205, "title": "Adding new samples to the test suite", "url": "https://github.com/microsoft/Agent365-Samples/pull/205", "author": "abdulanu0", "createdAt": "2026-02-05T11:30:00Z", "merged": false, "draft": false, "fixes": [{ "number": 220, "url": "https://github.com/microsoft/Agent365-Samples/issues/220" }] },
+                { "number": 190, "title": "Adding notifications to python agent", "url": "https://github.com/microsoft/Agent365-Samples/pull/190", "author": "abdulanu0", "createdAt": "2026-02-05T11:30:00Z", "merged": false, "draft": false, "fixes": [] }
+            ]
+        };
 
         async function loadMetrics() {
             try {
@@ -825,6 +937,17 @@ <h2>🧪 Sample Test Results</h2>
         function renderDashboard() {
             if (!metricsData) return;
 
+            // Calculate actual bug counts from entries (more accurate than summary)
+            const actualBugCounts = { total: 0, byStage: {} };
+            metricsData.entries.forEach(entry => {
+                const bugCount = entry.bugsCaught?.details?.length || 0;
+                actualBugCounts.total += bugCount;
+                if (!actualBugCounts.byStage[entry.stage]) {
+                    actualBugCounts.byStage[entry.stage] = 0;
+                }
+                actualBugCounts.byStage[entry.stage] += bugCount;
+            });
+
             // Update last updated with status indicator
             const lastUpdatedDate = new Date(metricsData.lastUpdated);
             const now = new Date();
@@ -843,15 +966,14 @@ <h2>🧪 Sample Test Results</h2>
                 statusText.textContent = `Updated ${Math.floor(hoursSinceUpdate / 24)}d ago`;
             }
 
-            // Update total bugs caught with animation
-            animateCounter('totalBugsCaught', metricsData.summary.totalBugsCaught);
+            // Update total issues caught with animation (use actual count from entries)
+            animateCounter('totalBugsCaught', actualBugCounts.total);
 
-            // Update stage cards
-            const stages = metricsData.summary.byStage;
-            animateCounter('preReleaseBugs', stages['pre-release']?.bugsCaught || 0);
-            animateCounter('preCheckinBugs', stages['pre-checkin']?.bugsCaught || 0);
-            animateCounter('postCheckinBugs', stages['post-checkin']?.bugsCaught || 0);
-            animateCounter('releaseBugs', stages['release']?.bugsCaught || 0);
+            // Update stage cards with actual counts
+            animateCounter('preReleaseBugs', actualBugCounts.byStage['pre-release'] || 0);
+            animateCounter('preCheckinBugs', actualBugCounts.byStage['pre-checkin'] || 0);
+            animateCounter('postCheckinBugs', actualBugCounts.byStage['post-checkin'] || 0);
+            animateCounter('releaseBugs', actualBugCounts.byStage['release'] || 0);
 
             // Update SDK version lists for each stage
             renderSdkListForStage('pre-release', 'preReleaseSdkList');
@@ -873,17 +995,26 @@ <h2>🧪 Sample Test Results</h2>
             renderPRList();
         }
 
+        // Helper to check if package is an Agent 365 SDK package
+        function isAgent365Package(pkg) {
+            const a365Patterns = ['microsoft-agents', 'microsoft.agents', '@microsoft/agents'];
+            return a365Patterns.some(p => pkg.toLowerCase().includes(p));
+        }
+
         function renderSdkListForStage(stage, elementId) {
             const container = document.getElementById(elementId);
             
             // Find entries for this stage
             const stageEntries = metricsData.entries.filter(e => e.stage === stage);
             
-            // Collect unique SDK versions
+            // Collect unique Agent 365 SDK versions only
             const sdkVersions = new Map();
             stageEntries.forEach(entry => {
                 if (entry.sdkVersions) {
                     Object.entries(entry.sdkVersions).forEach(([pkg, version]) => {
+                        // Only include Agent 365 SDK packages
+                        if (!isAgent365Package(pkg)) return;
+                        
                         const key = `${pkg}@${version}`;
                         if (!sdkVersions.has(key)) {
                             sdkVersions.set(key, {
@@ -899,24 +1030,38 @@ <h2>🧪 Sample Test Results</h2>
             });
 
             if (sdkVersions.size === 0) {
-                container.innerHTML = '<span style="color: var(--text-secondary)">No data yet</span>';
+                container.innerHTML = '<span style="color: var(--text-secondary)">No Agent 365 SDK data yet</span>';
                 return;
             }
 
             container.innerHTML = Array.from(sdkVersions.values())
                 .map(sdk => `<span class="sdk-tag ${sdk.hadBugs ? 'buggy' : ''}">${sdk.pkg} ${sdk.version}</span>`)
-                .join('');
+                .join(' ');
+        }
+
+        // Calculate actual bug counts from entries
+        function getActualBugCounts() {
+            const counts = { total: 0, byStage: {} };
+            metricsData.entries.forEach(entry => {
+                const bugCount = entry.bugsCaught?.details?.length || 0;
+                counts.total += bugCount;
+                if (!counts.byStage[entry.stage]) {
+                    counts.byStage[entry.stage] = 0;
+                }
+                counts.byStage[entry.stage] += bugCount;
+            });
+            return counts;
         }
 
         function renderBugsByStageChart() {
             const ctx = document.getElementById('bugsByStageChart').getContext('2d');
-            const stageData = metricsData.summary.byStage;
+            const actualCounts = getActualBugCounts();
             
             const data = [
-                stageData['pre-release']?.bugsCaught || 0,
-                stageData['pre-checkin']?.bugsCaught || 0,
-                stageData['post-checkin']?.bugsCaught || 0,
-                stageData['release']?.bugsCaught || 0
+                actualCounts.byStage['pre-release'] || 0,
+                actualCounts.byStage['pre-checkin'] || 0,
+                actualCounts.byStage['post-checkin'] || 0,
+                actualCounts.byStage['release'] || 0
             ];
 
             new Chart(ctx, {
@@ -924,7 +1069,7 @@ <h2>🧪 Sample Test Results</h2>
                 data: {
                     labels: ['Pre-Release', 'Pre-Checkin', 'Post-Checkin', 'Release'],
                     datasets: [{
-                        label: 'Bugs Caught',
+                        label: 'SDK Issues Caught',
                         data: data,
                         backgroundColor: ['#58a6ff', '#238636', '#d29922', '#da3633'],
                         borderWidth: 0,
@@ -950,15 +1095,18 @@ <h2>🧪 Sample Test Results</h2>
         function renderSdkVersionChart() {
             const ctx = document.getElementById('sdkVersionChart').getContext('2d');
             
-            // Group by SDK package and show pass/fail for each
+            // Group by SDK package and show pass/fail for each - ONLY Agent 365 SDK packages
             const packageData = {};
             metricsData.entries.forEach(entry => {
                 if (entry.sdkVersions) {
                     Object.entries(entry.sdkVersions).forEach(([pkg, version]) => {
+                        // Only include Agent 365 SDK packages
+                        if (!isAgent365Package(pkg)) return;
+                        
                         // Shorten package name for display
-                        const shortPkg = pkg.replace('microsoft-agents-', 'ms-')
-                                            .replace('Microsoft.', '')
-                                            .replace('@anthropic-ai/', '');
+                        const shortPkg = pkg.replace('microsoft-agents-', '')
+                                            .replace('Microsoft.Agents.', '')
+                                            .replace('microsoft.agents.', '');
                         const label = `${shortPkg}@${version}`;
                         
                         if (!packageData[label]) {
@@ -983,7 +1131,7 @@ <h2>🧪 Sample Test Results</h2>
                 ctx.font = '14px sans-serif';
                 ctx.fillStyle = '#8b949e';
                 ctx.textAlign = 'center';
-                ctx.fillText('No SDK version data yet', ctx.canvas.width / 2, ctx.canvas.height / 2);
+                ctx.fillText('No Agent 365 SDK data yet', ctx.canvas.width / 2, ctx.canvas.height / 2);
                 return;
             }
 
@@ -1020,19 +1168,18 @@ <h2>🧪 Sample Test Results</h2>
                 }
             });
         }
-                    }
-                }
-            });
-        }
 
         function renderSdkTable() {
             const tbody = document.getElementById('sdkTableBody');
             
-            // Aggregate SDK version stats
+            // Aggregate SDK version stats - ONLY Agent 365 SDK packages
             const sdkStats = {};
             metricsData.entries.forEach(entry => {
                 if (entry.sdkVersions) {
                     Object.entries(entry.sdkVersions).forEach(([pkg, version]) => {
+                        // Only include Agent 365 SDK packages
+                        if (!isAgent365Package(pkg)) return;
+                        
                         const key = `${pkg}|${version}`;
                         if (!sdkStats[key]) {
                             sdkStats[key] = { pkg, version, runs: 0, passed: 0, failed: 0 };
@@ -1074,24 +1221,34 @@ <h2>🧪 Sample Test Results</h2>
                 }
             });
 
+            // Collect bug details for each SDK package
+            const sdkBugs = {};
+            metricsData.entries.forEach(entry => {
+                if (entry.bugsCaught?.details?.length > 0 && entry.sdkVersions) {
+                    Object.keys(entry.sdkVersions).forEach(pkg => {
+                        if (isAgent365Package(pkg)) {
+                            const key = `${pkg}|${entry.sdkVersions[pkg]}`;
+                            if (!sdkBugs[key]) sdkBugs[key] = [];
+                            sdkBugs[key].push(...entry.bugsCaught.details);
+                        }
+                    });
+                }
+            });
+
             tbody.innerHTML = rows.map(sdk => {
-                const validation = sdkValidation[sdk.pkg];
-                const latestVersion = validation?.latest || 'checking...';
-                const isUpToDate = validation?.isUpToDate ?? true;
-                const isPreRelease = validation?.isPreRelease ?? false;
-                const bugsCaught = sdk.failed;
+                const key = `${sdk.pkg}|${sdk.version}`;
+                const bugs = sdkBugs[key] || [];
+                const bugsCaught = bugs.length;
+                const isPreRelease = sdk.version.includes('preview') || sdk.version.includes('alpha') || sdk.version.includes('beta');
                 
                 // Determine status
                 let statusBadge, statusText;
-                if (!isUpToDate) {
-                    statusBadge = 'warning';
-                    statusText = '⚠️ Outdated';
+                if (bugsCaught > 0) {
+                    statusBadge = 'failed';
+                    statusText = `🐛 ${bugsCaught} bugs`;
                 } else if (isPreRelease) {
                     statusBadge = 'prerelease';
                     statusText = '🔬 Pre-release';
-                } else if (bugsCaught > 0) {
-                    statusBadge = 'failed';
-                    statusText = `🐛 ${bugsCaught} bugs`;
                 } else {
                     statusBadge = 'passed';
                     statusText = '✅ Current';
@@ -1099,15 +1256,18 @@ <h2>🧪 Sample Test Results</h2>
                 
                 const preReleaseBadge = isPreRelease ? '<span class="prerelease-badge">PRE</span>' : '';
                 
+                // Make bugs clickable if there are bugs
+                const bugsCell = bugsCaught > 0 
+                    ? `<a href="#bugList" class="bug-link" title="${bugs.map(b => b.testName).join(', ')}" style="color: var(--accent-red); cursor: pointer; text-decoration: underline;">${bugsCaught}</a>`
+                    : `<span style="color: var(--accent-green)">0</span>`;
+                
                 return `
                     <tr>
                         <td><code>${sdk.pkg}</code></td>
+                        <td><code>${sdk.version}</code>${preReleaseBadge}</td>
                         <td><code>${sdk.version}</code></td>
-                        <td>
-                            <code>${latestVersion}</code>${preReleaseBadge}
-                        </td>
                         <td>${sdk.runs}</td>
-                        <td><span style="color: ${bugsCaught > 0 ? 'var(--accent-yellow)' : 'var(--accent-green)'}">${bugsCaught}</span></td>
+                        <td>${bugsCell}</td>
                         <td><span class="status-badge ${statusBadge}">${statusText}</span></td>
                     </tr>
                 `;
@@ -1116,7 +1276,27 @@ <h2>🧪 Sample Test Results</h2>
 
         function renderSampleTable() {
             const tbody = document.getElementById('sampleTableBody');
-            const sampleData = metricsData.summary.bySample;
+            
+            // Calculate sample data from entries
+            const sampleData = {};
+            metricsData.entries.forEach(entry => {
+                const sample = entry.sampleName;
+                if (!sampleData[sample]) {
+                    sampleData[sample] = { runs: 0, passed: 0, failed: 0, bugsCaught: 0, sdkVersions: {} };
+                }
+                sampleData[sample].runs++;
+                sampleData[sample].passed += entry.testResults.passed;
+                sampleData[sample].failed += entry.testResults.failed;
+                sampleData[sample].bugsCaught += entry.bugsCaught?.details?.length || 0;
+                // Store Agent 365 SDK versions only
+                if (entry.sdkVersions) {
+                    Object.entries(entry.sdkVersions).forEach(([pkg, ver]) => {
+                        if (isAgent365Package(pkg)) {
+                            sampleData[sample].sdkVersions[pkg] = ver;
+                        }
+                    });
+                }
+            });
 
             if (Object.keys(sampleData).length === 0) {
                 tbody.innerHTML = `
@@ -1133,18 +1313,18 @@ <h2>🧪 Sample Test Results</h2>
                 const total = data.passed + data.failed;
                 const successRate = total > 0 ? ((data.passed / total) * 100).toFixed(1) : 100;
                 
-                // Find latest SDK version for this sample
-                const sampleEntry = metricsData.entries.find(e => e.sampleName === sample);
-                const sdkVersion = sampleEntry?.sdkVersions ? 
-                    Object.entries(sampleEntry.sdkVersions).map(([k,v]) => `${v}`).join(', ') : 
-                    'Unknown';
+                // Get Agent 365 SDK version (first one found)
+                const sdkVersion = Object.entries(data.sdkVersions)
+                    .map(([k, v]) => v)
+                    .slice(0, 2)
+                    .join(', ') || 'N/A';
                 
                 return `
                     <tr>
                         <td>${sample}</td>
                         <td><code>${sdkVersion}</code></td>
                         <td>${data.runs}</td>
-                        <td><span style="color: ${data.bugsCaught > 0 ? 'var(--accent-yellow)' : 'var(--accent-green)'}">${data.bugsCaught}</span></td>
+                        <td><span style="color: ${data.bugsCaught > 0 ? 'var(--accent-red)' : 'var(--accent-green)'}">${data.bugsCaught}</span></td>
                         <td>
                             <span class="status-badge ${successRate >= 90 ? 'passed' : 'failed'}">
                                 ${successRate}%
@@ -1158,35 +1338,52 @@ <h2>🧪 Sample Test Results</h2>
         function renderBugList() {
             const container = document.getElementById('bugList');
             
-            // Find entries with bugs
+            // Find entries with bugs (check details array length, not count)
             const bugsFound = metricsData.entries
-                .filter(e => e.bugsCaught && e.bugsCaught.count > 0)
+                .filter(e => e.bugsCaught?.details?.length > 0)
                 .slice(0, 10);
 
             if (bugsFound.length === 0) {
                 container.innerHTML = `
                     <p style="color: var(--text-secondary); text-align: center; padding: 40px;">
-                        ✅ No bugs caught yet. When E2E tests detect SDK issues, they'll appear here.
+                        ✅ No SDK issues caught yet. When E2E tests detect compatibility problems, they'll appear here.
                     </p>
                 `;
                 return;
             }
 
             container.innerHTML = bugsFound.map(entry => {
+                // Only show Agent 365 SDK versions
                 const sdkInfo = entry.sdkVersions ? 
-                    Object.entries(entry.sdkVersions).map(([k,v]) => `${k}@${v}`).join(', ') : 
+                    Object.entries(entry.sdkVersions)
+                        .filter(([k]) => isAgent365Package(k))
+                        .map(([k,v]) => {
+                            const shortName = k.replace('microsoft-agents-', '').replace('Microsoft.Agents.', '').replace('@microsoft/agents-', '');
+                            return `${shortName}@${v}`;
+                        })
+                        .join(', ') : 
                     'Unknown';
                 
-                return entry.bugsCaught.details.map(bug => `
+                return entry.bugsCaught.details.map(bug => {
+                    const issueLink = bug.issueUrl 
+                        ? `<a href="${bug.issueUrl}" target="_blank" class="issue-link ${bug.issueState || 'open'}">#${bug.issueNumber}</a>`
+                        : '';
+                    const runLink = entry.runUrl 
+                        ? `<a href="${entry.runUrl}" target="_blank" style="color: var(--accent-blue); font-size: 0.85rem;">View Run →</a>`
+                        : '';
+                        
+                    return `
                     <div class="bug-item">
                         <div class="bug-header">
                             <span class="bug-stage">${getStageLabel(entry.stage)} • ${entry.sampleName}</span>
                             <span class="bug-sdk">${sdkInfo}</span>
                         </div>
-                        <div class="bug-test">${bug.testName}</div>
+                        <div class="bug-test">${bug.testName} ${issueLink}</div>
                         <div class="bug-error">${bug.errorMessage || 'Test failed'}</div>
+                        <div style="margin-top: 8px;">${runLink}</div>
                     </div>
-                `).join('');
+                `;
+                }).join('');
             }).join('');
         }
 
@@ -1361,6 +1558,11 @@ <h2>🧪 Sample Test Results</h2>
         }
 
         // Load metrics on page load
+        // Use fallback data immediately, then try to fetch fresh data
+        metricsData = fallbackData;
+        renderDashboard();
+        
+        // Also try to fetch fresh data
         loadMetrics();
     </script>
 </body>

From 551cec5932beddb203a133521ada389b17c42ef4 Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Thu, 5 Feb 2026 17:31:16 -0800
Subject: [PATCH 5/9] fix: address CodeQL code injection vulnerabilities

- Use environment variables instead of direct interpolation in workflow
- Fix 3 critical security issues in e2e-metrics.yml:
  1. Line 52: github.event.workflow_run.head_branch in stage detection
  2. Line 110: github context variables in PowerShell metrics placeholders
  3. Line 155: github.event.workflow_run.head_branch in Emit-TestMetrics call
- Pass context through env: block to prevent untrusted code execution
---
 .github/workflows/e2e-metrics.yml | 47 ++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/e2e-metrics.yml b/.github/workflows/e2e-metrics.yml
index 056259b4..fccfd5d8 100644
--- a/.github/workflows/e2e-metrics.yml
+++ b/.github/workflows/e2e-metrics.yml
@@ -42,14 +42,19 @@ jobs:
 
       - name: Determine Testing Stage
         id: stage
+        env:
+          EVENT_NAME: ${{ github.event_name }}
+          EVENT_TYPE: ${{ github.event.workflow_run.event }}
+          HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
+          DISPATCH_STAGE: ${{ github.event.inputs.stage }}
         run: |
-          if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
-            STAGE="${{ github.event.inputs.stage }}"
-          elif [ "${{ github.event.workflow_run.event }}" == "schedule" ]; then
+          if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
+            STAGE="$DISPATCH_STAGE"
+          elif [ "$EVENT_TYPE" = "schedule" ]; then
             STAGE="scheduled"
-          elif [ "${{ github.event.workflow_run.event }}" == "pull_request" ]; then
+          elif [ "$EVENT_TYPE" = "pull_request" ]; then
             STAGE="pre-checkin"
-          elif [ "${{ github.event.workflow_run.head_branch }}" == "main" ]; then
+          elif [ "$HEAD_BRANCH" = "main" ]; then
             STAGE="post-checkin"
           else
             STAGE="pre-checkin"
@@ -83,13 +88,29 @@ jobs:
       - name: Process Test Results
         id: process
         shell: pwsh
+        env:
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          CURRENT_RUN_ID: ${{ github.run_id }}
+          HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
+          CURRENT_SHA: ${{ github.sha }}
+          HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
+          CURRENT_REF: ${{ github.ref_name }}
+          ACTOR_LOGIN: ${{ github.event.workflow_run.actor.login }}
+          CURRENT_ACTOR: ${{ github.actor }}
+          WORKFLOW_NAME: ${{ github.event.workflow_run.name }}
+          CURRENT_WORKFLOW: ${{ github.workflow }}
+          STAGE: ${{ steps.stage.outputs.stage }}
         run: |
           $scriptsPath = "./scripts/e2e"
           $artifactsPath = "./artifacts"
           $metricsDir = "./docs/metrics/raw"
           $historyFile = "./docs/metrics/history.json"
-          $stage = "${{ steps.stage.outputs.stage }}"
-          $runId = "${{ github.event.workflow_run.id || github.run_id }}"
+          $stage = $env:STAGE
+          $runId = if ($env:WORKFLOW_RUN_ID) { $env:WORKFLOW_RUN_ID } else { $env:CURRENT_RUN_ID }
+          $commitSha = if ($env:HEAD_SHA) { $env:HEAD_SHA } else { $env:CURRENT_SHA }
+          $branch = if ($env:HEAD_BRANCH) { $env:HEAD_BRANCH } else { $env:CURRENT_REF }
+          $actor = if ($env:ACTOR_LOGIN) { $env:ACTOR_LOGIN } else { $env:CURRENT_ACTOR }
+          $workflow = if ($env:WORKFLOW_NAME) { $env:WORKFLOW_NAME } else { $env:CURRENT_WORKFLOW }
           
           # Create metrics directory
           New-Item -ItemType Directory -Path $metricsDir -Force | Out-Null
@@ -106,10 +127,10 @@ jobs:
               runId = $runId
               sampleName = "unknown"
               timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
-              commitSha = "${{ github.event.workflow_run.head_sha || github.sha }}"
-              branch = "${{ github.event.workflow_run.head_branch || github.ref_name }}"
-              actor = "${{ github.event.workflow_run.actor.login || github.actor }}"
-              workflow = "${{ github.event.workflow_run.name || github.workflow }}"
+              commitSha = $commitSha
+              branch = $branch
+              actor = $actor
+              workflow = $workflow
               stage = $stage
               testResults = @{
                 status = "no-results"
@@ -150,8 +171,8 @@ jobs:
                 -Stage $stage `
                 -OutputPath $metricsFile `
                 -RunId $runId `
-                -CommitSha "${{ github.event.workflow_run.head_sha || github.sha }}" `
-                -Branch "${{ github.event.workflow_run.head_branch || github.ref_name }}"
+                -CommitSha $commitSha `
+                -Branch $branch
             }
           }
           

From 8d25df88746e3b4e3de741bd1bd641e13f52ec2c Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Thu, 5 Feb 2026 17:48:26 -0800
Subject: [PATCH 6/9] feat: split SDK validation into Node.js, .NET, and Python
 sections

- Separate tables for each language ecosystem
- Color-coded headers (green/purple/yellow)
- Language-specific empty state messages
- Updated package detection logic for each platform
---
 docs/metrics/index.html | 245 +++++++++++++++++++++++++---------------
 1 file changed, 154 insertions(+), 91 deletions(-)

diff --git a/docs/metrics/index.html b/docs/metrics/index.html
index a2b52c0b..85f19c28 100644
--- a/docs/metrics/index.html
+++ b/docs/metrics/index.html
@@ -697,27 +697,81 @@ <h2>🔍 Error Categories</h2>
             </table>
         </div>
 
-        <!-- SDK Version Tracking Table -->
+        <!-- SDK Version Tracking Tables by Language -->
         <div class="sdk-section">
             <h2>📦 Agent 365 SDK Version Validation</h2>
-            <p style="color: var(--text-secondary); margin-bottom: 15px;">
-                Tracking Agent 365 SDK packages (<code>microsoft-agents-*</code>) used in E2E tests to catch compatibility issues early
+            <p style="color: var(--text-secondary); margin-bottom: 20px;">
+                Tracking Agent 365 SDK packages used in E2E tests to catch compatibility issues early
             </p>
-            <table id="sdkTable">
-                <thead>
-                    <tr>
-                        <th>SDK Package</th>
-                        <th>Installed</th>
-                        <th>Latest Available</th>
-                        <th>Test Runs</th>
-                        <th>Bugs Caught</th>
-                        <th>Status</th>
-                    </tr>
-                </thead>
-                <tbody id="sdkTableBody">
-                    <!-- Populated by JS -->
-                </tbody>
-            </table>
+            
+            <!-- Node.js SDK Packages -->
+            <div class="language-sdk-section">
+                <h3 style="color: var(--accent-green); margin-bottom: 10px; display: flex; align-items: center; gap: 8px;">
+                    <span style="font-size: 1.2rem;">🟢</span> Node.js Packages
+                    <code style="font-size: 0.8rem; color: var(--text-secondary);">@microsoft/agents-*</code>
+                </h3>
+                <table id="sdkTableNodejs">
+                    <thead>
+                        <tr>
+                            <th>SDK Package</th>
+                            <th>Installed</th>
+                            <th>Latest Available</th>
+                            <th>Test Runs</th>
+                            <th>Bugs Caught</th>
+                            <th>Status</th>
+                        </tr>
+                    </thead>
+                    <tbody id="sdkTableBodyNodejs">
+                        <!-- Populated by JS -->
+                    </tbody>
+                </table>
+            </div>
+
+            <!-- .NET SDK Packages -->
+            <div class="language-sdk-section" style="margin-top: 25px;">
+                <h3 style="color: var(--accent-purple); margin-bottom: 10px; display: flex; align-items: center; gap: 8px;">
+                    <span style="font-size: 1.2rem;">🟣</span> .NET Packages
+                    <code style="font-size: 0.8rem; color: var(--text-secondary);">Microsoft.Agents.*</code>
+                </h3>
+                <table id="sdkTableDotnet">
+                    <thead>
+                        <tr>
+                            <th>SDK Package</th>
+                            <th>Installed</th>
+                            <th>Latest Available</th>
+                            <th>Test Runs</th>
+                            <th>Bugs Caught</th>
+                            <th>Status</th>
+                        </tr>
+                    </thead>
+                    <tbody id="sdkTableBodyDotnet">
+                        <!-- Populated by JS -->
+                    </tbody>
+                </table>
+            </div>
+
+            <!-- Python SDK Packages -->
+            <div class="language-sdk-section" style="margin-top: 25px;">
+                <h3 style="color: var(--accent-yellow); margin-bottom: 10px; display: flex; align-items: center; gap: 8px;">
+                    <span style="font-size: 1.2rem;">🟡</span> Python Packages
+                    <code style="font-size: 0.8rem; color: var(--text-secondary);">microsoft-agents-*</code>
+                </h3>
+                <table id="sdkTablePython">
+                    <thead>
+                        <tr>
+                            <th>SDK Package</th>
+                            <th>Installed</th>
+                            <th>Latest Available</th>
+                            <th>Test Runs</th>
+                            <th>Bugs Caught</th>
+                            <th>Status</th>
+                        </tr>
+                    </thead>
+                    <tbody id="sdkTableBodyPython">
+                        <!-- Populated by JS -->
+                    </tbody>
+                </table>
+            </div>
         </div>
 
         <!-- Recent SDK Issues Caught -->
@@ -1001,6 +1055,21 @@ <h2>🧪 Sample Test Results</h2>
             return a365Patterns.some(p => pkg.toLowerCase().includes(p));
         }
 
+        // Helper to detect package language/platform
+        function getPackageLanguage(pkg) {
+            const pkgLower = pkg.toLowerCase();
+            // Node.js: @microsoft/agents-* or scoped npm packages
+            if (pkgLower.startsWith('@microsoft/agents')) return 'nodejs';
+            // .NET: Microsoft.Agents.* (PascalCase NuGet convention)
+            if (pkg.startsWith('Microsoft.Agents.')) return 'dotnet';
+            // Python: microsoft-agents-* (lowercase with hyphens, pip convention)
+            if (pkgLower.startsWith('microsoft-agents-')) return 'python';
+            // Fallback based on common patterns
+            if (pkgLower.includes('.')) return 'dotnet'; // NuGet uses dots
+            if (pkgLower.includes('-')) return 'python'; // pip uses hyphens
+            return 'unknown';
+        }
+
         function renderSdkListForStage(stage, elementId) {
             const container = document.getElementById(elementId);
             
@@ -1170,53 +1239,25 @@ <h2>🧪 Sample Test Results</h2>
         }
 
         function renderSdkTable() {
-            const tbody = document.getElementById('sdkTableBody');
+            // Aggregate SDK version stats by language - ONLY Agent 365 SDK packages
+            const sdkStatsByLang = { nodejs: {}, dotnet: {}, python: {} };
             
-            // Aggregate SDK version stats - ONLY Agent 365 SDK packages
-            const sdkStats = {};
             metricsData.entries.forEach(entry => {
                 if (entry.sdkVersions) {
                     Object.entries(entry.sdkVersions).forEach(([pkg, version]) => {
                         // Only include Agent 365 SDK packages
                         if (!isAgent365Package(pkg)) return;
                         
+                        const lang = getPackageLanguage(pkg);
+                        if (!sdkStatsByLang[lang]) sdkStatsByLang[lang] = {};
+                        
                         const key = `${pkg}|${version}`;
-                        if (!sdkStats[key]) {
-                            sdkStats[key] = { pkg, version, runs: 0, passed: 0, failed: 0 };
-                        }
-                        sdkStats[key].runs++;
-                        sdkStats[key].passed += entry.testResults.passed;
-                        sdkStats[key].failed += entry.testResults.failed;
-                    });
-                }
-            });
-
-            const rows = Object.values(sdkStats);
-            
-            if (rows.length === 0) {
-                tbody.innerHTML = `
-                    <tr>
-                        <td colspan="6" style="text-align: center; color: var(--text-secondary); padding: 40px;">
-                            SDK version data will appear after E2E tests run
-                        </td>
-                    </tr>
-                `;
-                return;
-            }
-
-            // Get SDK validation data from most recent entries
-            const sdkValidation = {};
-            metricsData.entries.forEach(entry => {
-                if (entry.sdkValidation && entry.sdkValidation.packages) {
-                    entry.sdkValidation.packages.forEach(pkg => {
-                        if (!sdkValidation[pkg.package] || entry.timestamp > sdkValidation[pkg.package].timestamp) {
-                            sdkValidation[pkg.package] = {
-                                latest: pkg.latest,
-                                isUpToDate: pkg.isUpToDate,
-                                isPreRelease: pkg.isPreRelease,
-                                timestamp: entry.timestamp
-                            };
+                        if (!sdkStatsByLang[lang][key]) {
+                            sdkStatsByLang[lang][key] = { pkg, version, runs: 0, passed: 0, failed: 0 };
                         }
+                        sdkStatsByLang[lang][key].runs++;
+                        sdkStatsByLang[lang][key].passed += entry.testResults.passed;
+                        sdkStatsByLang[lang][key].failed += entry.testResults.failed;
                     });
                 }
             });
@@ -1235,43 +1276,65 @@ <h2>🧪 Sample Test Results</h2>
                 }
             });
 
-            tbody.innerHTML = rows.map(sdk => {
-                const key = `${sdk.pkg}|${sdk.version}`;
-                const bugs = sdkBugs[key] || [];
-                const bugsCaught = bugs.length;
-                const isPreRelease = sdk.version.includes('preview') || sdk.version.includes('alpha') || sdk.version.includes('beta');
+            // Helper to render a single language table
+            function renderLanguageTable(lang, tbodyId, emptyMessage) {
+                const tbody = document.getElementById(tbodyId);
+                const rows = Object.values(sdkStatsByLang[lang] || {});
                 
-                // Determine status
-                let statusBadge, statusText;
-                if (bugsCaught > 0) {
-                    statusBadge = 'failed';
-                    statusText = `🐛 ${bugsCaught} bugs`;
-                } else if (isPreRelease) {
-                    statusBadge = 'prerelease';
-                    statusText = '🔬 Pre-release';
-                } else {
-                    statusBadge = 'passed';
-                    statusText = '✅ Current';
+                if (rows.length === 0) {
+                    tbody.innerHTML = `
+                        <tr>
+                            <td colspan="6" style="text-align: center; color: var(--text-secondary); padding: 30px;">
+                                ${emptyMessage}
+                            </td>
+                        </tr>
+                    `;
+                    return;
                 }
-                
-                const preReleaseBadge = isPreRelease ? '<span class="prerelease-badge">PRE</span>' : '';
-                
-                // Make bugs clickable if there are bugs
-                const bugsCell = bugsCaught > 0 
-                    ? `<a href="#bugList" class="bug-link" title="${bugs.map(b => b.testName).join(', ')}" style="color: var(--accent-red); cursor: pointer; text-decoration: underline;">${bugsCaught}</a>`
-                    : `<span style="color: var(--accent-green)">0</span>`;
-                
-                return `
-                    <tr>
-                        <td><code>${sdk.pkg}</code></td>
-                        <td><code>${sdk.version}</code>${preReleaseBadge}</td>
-                        <td><code>${sdk.version}</code></td>
-                        <td>${sdk.runs}</td>
-                        <td>${bugsCell}</td>
-                        <td><span class="status-badge ${statusBadge}">${statusText}</span></td>
-                    </tr>
-                `;
-            }).join('');
+
+                tbody.innerHTML = rows.map(sdk => {
+                    const key = `${sdk.pkg}|${sdk.version}`;
+                    const bugs = sdkBugs[key] || [];
+                    const bugsCaught = bugs.length;
+                    const isPreRelease = sdk.version.includes('preview') || sdk.version.includes('alpha') || sdk.version.includes('beta');
+                    
+                    // Determine status
+                    let statusBadge, statusText;
+                    if (bugsCaught > 0) {
+                        statusBadge = 'failed';
+                        statusText = `🐛 ${bugsCaught} bugs`;
+                    } else if (isPreRelease) {
+                        statusBadge = 'prerelease';
+                        statusText = '🔬 Pre-release';
+                    } else {
+                        statusBadge = 'passed';
+                        statusText = '✅ Current';
+                    }
+                    
+                    const preReleaseBadge = isPreRelease ? '<span class="prerelease-badge">PRE</span>' : '';
+                    
+                    // Make bugs clickable if there are bugs
+                    const bugsCell = bugsCaught > 0 
+                        ? `<a href="#bugList" class="bug-link" title="${bugs.map(b => b.testName).join(', ')}" style="color: var(--accent-red); cursor: pointer; text-decoration: underline;">${bugsCaught}</a>`
+                        : `<span style="color: var(--accent-green)">0</span>`;
+                    
+                    return `
+                        <tr>
+                            <td><code>${sdk.pkg}</code></td>
+                            <td><code>${sdk.version}</code>${preReleaseBadge}</td>
+                            <td><code>${sdk.version}</code></td>
+                            <td>${sdk.runs}</td>
+                            <td>${bugsCell}</td>
+                            <td><span class="status-badge ${statusBadge}">${statusText}</span></td>
+                        </tr>
+                    `;
+                }).join('');
+            }
+
+            // Render each language table
+            renderLanguageTable('nodejs', 'sdkTableBodyNodejs', 'No Node.js SDK packages detected yet');
+            renderLanguageTable('dotnet', 'sdkTableBodyDotnet', 'No .NET SDK packages detected yet');
+            renderLanguageTable('python', 'sdkTableBodyPython', 'No Python SDK packages detected yet');
         }
 
         function renderSampleTable() {

From 62073b431c5cfa3062ccac7f185b8a6b1bc264c6 Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Fri, 6 Feb 2026 10:31:30 -0800
Subject: [PATCH 7/9] fix: address Copilot review comments

PowerShell Scripts:
- Add copyright headers to Emit-TestMetrics.ps1, Aggregate-Metrics.ps1
- Fix null reference for errorMessage with safer substring handling
- Fix null handling in JSON parsing for history file
- Use GITHUB_REPOSITORY env var as default for Repository param
- Make dashboard URL configurable via METRICS_DASHBOARD_URL env var
- Fix temp file cleanup with proper error handling
- Fix null reference for regex Matches[3] in requirements parsing
- Add comment about version comparison being simple equality
- Improve gh issue create error handling

Workflow:
- Add concurrency group to prevent race conditions
- Add documentation comment about permissions

Dashboard:
- Add escapeHtml() function to prevent XSS vulnerabilities
- Escape user-controlled content (testName, errorMessage, PR title/author)
- Add aria-label attributes to emojis and canvas elements for accessibility
- Handle null lastUpdated date gracefully
---
 .github/workflows/e2e-metrics.yml    |  7 ++++
 docs/metrics/index.html              | 49 ++++++++++++++++++----------
 scripts/e2e/Aggregate-Metrics.ps1    |  8 ++++-
 scripts/e2e/Create-GitHubIssue.ps1   | 20 +++++++++---
 scripts/e2e/Emit-TestMetrics.ps1     | 12 ++++++-
 scripts/e2e/Validate-SdkVersions.ps1 |  6 +++-
 6 files changed, 78 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/e2e-metrics.yml b/.github/workflows/e2e-metrics.yml
index fccfd5d8..22d986da 100644
--- a/.github/workflows/e2e-metrics.yml
+++ b/.github/workflows/e2e-metrics.yml
@@ -1,5 +1,7 @@
 # E2E Test Metrics Collection Workflow
 # This workflow runs after E2E tests complete to collect metrics and update the dashboard
+# NOTE: This workflow requires `contents: write` to commit generated E2E metrics.
+# Workflow steps only update metrics-related files (docs/metrics/).
 
 name: E2E Metrics Collection
 
@@ -24,6 +26,11 @@ on:
           - release
           - scheduled
 
+# Prevent concurrent metrics updates to avoid merge conflicts
+concurrency:
+  group: metrics-update
+  cancel-in-progress: false
+
 permissions:
   contents: write
   actions: read
diff --git a/docs/metrics/index.html b/docs/metrics/index.html
index 85f19c28..284188e0 100644
--- a/docs/metrics/index.html
+++ b/docs/metrics/index.html
@@ -618,7 +618,7 @@ <h2>SDK Issues Caught Before Production</h2>
         <!-- Stage Cards - Main Focus -->
         <div class="stage-cards">
             <div class="stage-card pre-release">
-                <h3>🔬 Pre-Release Testing</h3>
+                <h3><span role="img" aria-label="microscope">🔬</span> Pre-Release Testing</h3>
                 <div class="bugs-caught" id="preReleaseBugs">0</div>
                 <div class="description">Issues caught testing pre-release Agent 365 SDK versions</div>
                 <div class="sdk-versions">
@@ -661,15 +661,15 @@ <h4>Agent 365 SDK Versions</h4>
         <!-- Charts -->
         <div class="charts-section">
             <div class="chart-card">
-                <h3>📊 Bugs Caught by Testing Stage</h3>
+                <h3><span role="img" aria-label="chart">📊</span> Bugs Caught by Testing Stage</h3>
                 <div class="chart-container">
-                    <canvas id="bugsByStageChart"></canvas>
+                    <canvas id="bugsByStageChart" role="img" aria-label="Bar chart showing number of SDK issues caught at each testing stage"></canvas>
                 </div>
             </div>
             <div class="chart-card">
-                <h3>📦 SDK Test Results by Package</h3>
+                <h3><span role="img" aria-label="package">📦</span> SDK Test Results by Package</h3>
                 <div class="chart-container">
-                    <canvas id="sdkVersionChart"></canvas>
+                    <canvas id="sdkVersionChart" role="img" aria-label="Chart showing SDK test results by package and version"></canvas>
                 </div>
             </div>
         </div>
@@ -835,6 +835,17 @@ <h2>🧪 Sample Test Results</h2>
         Chart.defaults.color = '#8b949e';
         Chart.defaults.borderColor = '#30363d';
 
+        // HTML escape function to prevent XSS
+        function escapeHtml(unsafe) {
+            if (!unsafe) return '';
+            return String(unsafe)
+                .replace(/&/g, '&amp;')
+                .replace(/</g, '&lt;')
+                .replace(/>/g, '&gt;')
+                .replace(/"/g, '&quot;')
+                .replace(/'/g, '&#039;');
+        }
+
         let metricsData = null;
         
         // Fallback data in case fetch fails (for local file:// access)
@@ -1003,16 +1014,20 @@ <h2>🧪 Sample Test Results</h2>
             });
 
             // Update last updated with status indicator
-            const lastUpdatedDate = new Date(metricsData.lastUpdated);
+            // Handle null lastUpdated gracefully
+            const lastUpdatedDate = metricsData.lastUpdated ? new Date(metricsData.lastUpdated) : null;
             const now = new Date();
-            const hoursSinceUpdate = (now - lastUpdatedDate) / (1000 * 60 * 60);
+            const hoursSinceUpdate = lastUpdatedDate ? (now - lastUpdatedDate) / (1000 * 60 * 60) : Infinity;
             
             document.getElementById('lastUpdated').textContent = 
-                `Last updated: ${lastUpdatedDate.toLocaleString()}`;
+                lastUpdatedDate ? `Last updated: ${lastUpdatedDate.toLocaleString()}` : 'Awaiting first E2E run';
             
             const statusDot = document.getElementById('statusDot');
             const statusText = document.getElementById('statusText');
-            if (hoursSinceUpdate < 24) {
+            if (!lastUpdatedDate) {
+                statusDot.className = 'status-dot stale';
+                statusText.textContent = 'No data yet';
+            } else if (hoursSinceUpdate < 24) {
                 statusDot.className = 'status-dot live';
                 statusText.textContent = 'Live data';
             } else {
@@ -1438,11 +1453,11 @@ <h2>🧪 Sample Test Results</h2>
                     return `
                     <div class="bug-item">
                         <div class="bug-header">
-                            <span class="bug-stage">${getStageLabel(entry.stage)} • ${entry.sampleName}</span>
-                            <span class="bug-sdk">${sdkInfo}</span>
+                            <span class="bug-stage">${getStageLabel(entry.stage)} • ${escapeHtml(entry.sampleName)}</span>
+                            <span class="bug-sdk">${escapeHtml(sdkInfo)}</span>
                         </div>
-                        <div class="bug-test">${bug.testName} ${issueLink}</div>
-                        <div class="bug-error">${bug.errorMessage || 'Test failed'}</div>
+                        <div class="bug-test">${escapeHtml(bug.testName)} ${issueLink}</div>
+                        <div class="bug-error">${escapeHtml(bug.errorMessage) || 'Test failed'}</div>
                         <div style="margin-top: 8px;">${runLink}</div>
                     </div>
                 `;
@@ -1598,20 +1613,20 @@ <h2>🧪 Sample Test Results</h2>
                 
                 // Render linked issues this PR fixes
                 const fixesLinks = pr.fixes && pr.fixes.length > 0
-                    ? pr.fixes.map(i => `<a href="${i.url}" class="issue-link closed" target="_blank">#${i.number}</a>`).join('')
+                    ? pr.fixes.map(i => `<a href="${escapeHtml(i.url)}" class="issue-link closed" target="_blank">#${i.number}</a>`).join('')
                     : '';
                 
                 return `
                     <div class="pr-item ${stateClass}">
                         <div class="pr-header">
-                            <a href="${pr.url}" class="pr-title" target="_blank">
-                                ${stateIcon} ${pr.title}
+                            <a href="${escapeHtml(pr.url)}" class="pr-title" target="_blank">
+                                ${stateIcon} ${escapeHtml(pr.title)}
                             </a>
                             <span class="status-badge ${stateClass}">${pr.merged ? 'Merged' : pr.draft ? 'Draft' : 'Open'}</span>
                         </div>
                         <div class="pr-meta">
                             <span>📅 ${new Date(pr.createdAt).toLocaleDateString()}</span>
-                            <span>👤 ${pr.author}</span>
+                            <span>👤 ${escapeHtml(pr.author)}</span>
                             <span>🏷️ #${pr.number}</span>
                         </div>
                         ${fixesLinks ? `<div class="pr-fixes">Fixes: ${fixesLinks}</div>` : ''}
diff --git a/scripts/e2e/Aggregate-Metrics.ps1 b/scripts/e2e/Aggregate-Metrics.ps1
index 3d43f88b..7b70a33d 100644
--- a/scripts/e2e/Aggregate-Metrics.ps1
+++ b/scripts/e2e/Aggregate-Metrics.ps1
@@ -1,3 +1,6 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
 <#
 .SYNOPSIS
     Aggregates individual test metrics into a consolidated metrics history file.
@@ -60,8 +63,11 @@ $history = @{
 if (Test-Path $HistoryFile) {
     try {
         $existingHistory = Get-Content $HistoryFile -Raw | ConvertFrom-Json -AsHashtable
-        if ($existingHistory.entries) {
+        # Validate parsed JSON has expected properties before accessing
+        if ($existingHistory -and $existingHistory.entries) {
             $history.entries = $existingHistory.entries
+        }
+        if ($existingHistory -and $existingHistory.summary) {
             $history.summary = $existingHistory.summary
         }
         Write-Host "Loaded existing history with $($history.entries.Count) entries" -ForegroundColor Green
diff --git a/scripts/e2e/Create-GitHubIssue.ps1 b/scripts/e2e/Create-GitHubIssue.ps1
index 16fbf512..72ed6167 100644
--- a/scripts/e2e/Create-GitHubIssue.ps1
+++ b/scripts/e2e/Create-GitHubIssue.ps1
@@ -30,7 +30,7 @@ param(
     [string]$MetricsFile,
 
     [Parameter(Mandatory = $false)]
-    [string]$Repository = "microsoft/Agent365-Samples",
+    [string]$Repository = $(if ($env:GITHUB_REPOSITORY) { $env:GITHUB_REPOSITORY } else { "microsoft/Agent365-Samples" }),
 
     [Parameter(Mandatory = $false)]
     [string[]]$Labels = @("e2e-failure", "automated"),
@@ -187,6 +187,13 @@ function New-GitHubIssue {
             --body-file $bodyFile `
             --label ($IssueData.labels -join ",") 2>&1
         
+        # Check for gh CLI errors
+        if ($LASTEXITCODE -ne 0) {
+            Write-Host "Error: gh issue create failed with exit code $LASTEXITCODE" -ForegroundColor Red
+            Write-Host "Output: $result" -ForegroundColor Yellow
+            return $null
+        }
+        
         # Parse issue URL from result
         if ($result -match "https://github.com/.+/issues/(\d+)") {
             $issueNumber = $Matches[1]
@@ -196,7 +203,7 @@ function New-GitHubIssue {
             }
         }
         
-        Write-Host "Warning: Could not parse issue URL from: $result" -ForegroundColor Yellow
+        Write-Host "Error: Failed to create issue. Could not parse URL from output: $result" -ForegroundColor Red
         return $null
     }
     catch {
@@ -204,7 +211,12 @@ function New-GitHubIssue {
         return $null
     }
     finally {
-        Remove-Item $bodyFile -Force -ErrorAction SilentlyContinue
+        try {
+            Remove-Item $bodyFile -Force -ErrorAction Stop
+        }
+        catch {
+            Write-Host "Warning: Could not clean up temp file $bodyFile : $_" -ForegroundColor Yellow
+        }
     }
 }
 
@@ -289,7 +301,7 @@ $sdkInfo
 
 ---
 *This issue was automatically created by the E2E test pipeline.*
-*Dashboard: [View Metrics](https://microsoft.github.io/Agent365-Samples/metrics/)*
+*Dashboard: [View Metrics]($(if ($env:METRICS_DASHBOARD_URL) { $env:METRICS_DASHBOARD_URL } else { 'https://microsoft.github.io/Agent365-Samples/metrics/' }))*
 "@
 
     $issueData = @{
diff --git a/scripts/e2e/Emit-TestMetrics.ps1 b/scripts/e2e/Emit-TestMetrics.ps1
index 2d6d3c62..71ff389b 100644
--- a/scripts/e2e/Emit-TestMetrics.ps1
+++ b/scripts/e2e/Emit-TestMetrics.ps1
@@ -1,3 +1,6 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
 <#
 .SYNOPSIS
     Emits structured test metrics for tracking E2E test results and SDK versions.
@@ -213,9 +216,16 @@ if ($TestResultsPath -and (Test-Path $TestResultsPath) -and $FailedTests -gt 0)
         $failedResults = $trx.TestRun.Results.UnitTestResult | Where-Object { $_.outcome -eq "Failed" }
         
         foreach ($result in $failedResults) {
+            # Safely extract error message with null check and redaction of potential secrets
+            $errorMsg = if ($result.Output.ErrorInfo.Message) {
+                $msg = ($result.Output.ErrorInfo.Message -replace "`r`n", " " -replace "`n", " ")
+                # Truncate to 500 chars safely
+                if ($msg.Length -gt 500) { $msg.Substring(0, 500) } else { $msg }
+            } else { "Test failed - see logs for details" }
+            
             $metrics.bugsCaught.details += @{
                 testName = $result.testName
-                errorMessage = ($result.Output.ErrorInfo.Message -replace "`r`n", " " -replace "`n", " ").Substring(0, [Math]::Min(500, $result.Output.ErrorInfo.Message.Length))
+                errorMessage = $errorMsg
             }
         }
     }
diff --git a/scripts/e2e/Validate-SdkVersions.ps1 b/scripts/e2e/Validate-SdkVersions.ps1
index 53aff360..3629e248 100644
--- a/scripts/e2e/Validate-SdkVersions.ps1
+++ b/scripts/e2e/Validate-SdkVersions.ps1
@@ -199,7 +199,7 @@ function Get-InstalledVersions {
                 foreach ($line in $lines) {
                     if ($line -match '^([a-zA-Z0-9_-]+)\s*([=<>!~]+)?\s*([\d.a-zA-Z-]+)?') {
                         $pkgName = $Matches[1]
-                        $version = if ($Matches[3]) { $Matches[3] } else { "not-pinned" }
+                        $version = if ($Matches.Count -ge 4 -and $Matches[3]) { $Matches[3] } else { "not-pinned" }
                         $versions[$pkgName] = $version
                     }
                 }
@@ -302,6 +302,10 @@ foreach ($pkgName in $trackedPackages) {
     }
     
     if ($latest) {
+        # Note: This uses simple string equality for version comparison.
+        # For semantic versioning with pre-release identifiers, a more robust
+        # comparison would be needed, but equality check suffices for our use case
+        # of detecting when installed version matches latest available.
         $isUpToDate = ($installed -eq $latest) -or ($installed -eq "not-pinned")
         $isPreRelease = $latest -match '(alpha|beta|preview|pre|rc|dev|a\d|b\d|-)'
         

From 0ba6d1b6068e0ebd80357b8824b55bcf1585f4dd Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Sun, 8 Feb 2026 19:03:39 -0800
Subject: [PATCH 8/9] feat: Update metrics workflow to push to Agent365-metrics
 repo

- Checkout Agent365-metrics repo in workflow
- Update paths to write metrics to metrics-repo/docs/
- Push history.json to Agent365-metrics instead of Agent365-Samples
- Update dashboard link to Azure Function endpoint
---
 .github/workflows/e2e-metrics.yml | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/e2e-metrics.yml b/.github/workflows/e2e-metrics.yml
index 22d986da..ca749989 100644
--- a/.github/workflows/e2e-metrics.yml
+++ b/.github/workflows/e2e-metrics.yml
@@ -47,6 +47,14 @@ jobs:
           ref: main
           fetch-depth: 0
 
+      - name: Checkout Metrics Repository
+        uses: actions/checkout@v4
+        with:
+          repository: microsoft/Agent365-metrics
+          ref: main
+          path: metrics-repo
+          token: ${{ secrets.METRICS_REPO_TOKEN }}
+
       - name: Determine Testing Stage
         id: stage
         env:
@@ -110,8 +118,8 @@ jobs:
         run: |
           $scriptsPath = "./scripts/e2e"
           $artifactsPath = "./artifacts"
-          $metricsDir = "./docs/metrics/raw"
-          $historyFile = "./docs/metrics/history.json"
+          $metricsDir = "./metrics-repo/docs/raw"
+          $historyFile = "./metrics-repo/docs/history.json"
           $stage = $env:STAGE
           $runId = if ($env:WORKFLOW_RUN_ID) { $env:WORKFLOW_RUN_ID } else { $env:CURRENT_RUN_ID }
           $commitSha = if ($env:HEAD_SHA) { $env:HEAD_SHA } else { $env:CURRENT_SHA }
@@ -210,11 +218,12 @@ jobs:
 
       - name: Commit Metrics Update
         if: steps.process.outputs.metrics_updated == 'true'
+        working-directory: metrics-repo
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
           
-          git add docs/metrics/history.json
+          git add docs/history.json
           
           # Check if there are changes to commit
           if git diff --staged --quiet; then
@@ -222,7 +231,7 @@ jobs:
           else
             git commit -m "📊 Update E2E test metrics [skip ci]"
             git push origin main
-            echo "✅ Metrics committed and pushed"
+            echo "✅ Metrics committed and pushed to Agent365-metrics"
           fi
 
       - name: Create GitHub Issues for Failures
@@ -249,4 +258,4 @@ jobs:
           echo "" >> $GITHUB_STEP_SUMMARY
           echo "Testing Stage: **${{ steps.stage.outputs.stage }}**" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
-          echo "View the [Metrics Dashboard](https://microsoft.github.io/Agent365-Samples/metrics/) for detailed statistics." >> $GITHUB_STEP_SUMMARY
+          echo "View the [E2E Testing Dashboard](https://agent365-metrics-dashboard.azurewebsites.net/api/e2e-dashboard) for detailed statistics." >> $GITHUB_STEP_SUMMARY

From 337e0dac1eba36509e251db9ce4ef8c4563387f8 Mon Sep 17 00:00:00 2001
From: abdulanu0 <abdulanu0@gmail.com>
Date: Tue, 10 Feb 2026 19:29:57 -0800
Subject: [PATCH 9/9] refactor: Use GitHub API to collect E2E metrics

- Remove artifact-based approach (E2E tests don't upload artifacts)
- Use GitHub API to fetch workflow run job statuses directly
- Create one entry per sample in dashboard-expected format
- Simplify workflow by removing PowerShell scripts dependency
- Push metrics to Agent365-metrics repo via METRICS_REPO_TOKEN
---
 .github/workflows/e2e-metrics.yml | 393 +++++++++++++++---------------
 1 file changed, 192 insertions(+), 201 deletions(-)

diff --git a/.github/workflows/e2e-metrics.yml b/.github/workflows/e2e-metrics.yml
index ca749989..3765eced 100644
--- a/.github/workflows/e2e-metrics.yml
+++ b/.github/workflows/e2e-metrics.yml
@@ -1,7 +1,9 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
 # E2E Test Metrics Collection Workflow
 # This workflow runs after E2E tests complete to collect metrics and update the dashboard
-# NOTE: This workflow requires `contents: write` to commit generated E2E metrics.
-# Workflow steps only update metrics-related files (docs/metrics/).
+# Uses GitHub API to fetch workflow run job statuses (no artifacts needed)
 
 name: E2E Metrics Collection
 
@@ -14,17 +16,10 @@ on:
   # Allow manual trigger for testing
   workflow_dispatch:
     inputs:
-      stage:
-        description: 'Testing stage'
-        required: true
-        default: 'scheduled'
-        type: choice
-        options:
-          - pre-release
-          - pre-checkin
-          - post-checkin
-          - release
-          - scheduled
+      run_id:
+        description: 'Workflow run ID to collect metrics from (optional)'
+        required: false
+        type: string
 
 # Prevent concurrent metrics updates to avoid merge conflicts
 concurrency:
@@ -32,230 +27,226 @@ concurrency:
   cancel-in-progress: false
 
 permissions:
-  contents: write
   actions: read
-  issues: write
 
 jobs:
   collect-metrics:
     runs-on: ubuntu-latest
     
     steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-        with:
-          ref: main
-          fetch-depth: 0
-
       - name: Checkout Metrics Repository
         uses: actions/checkout@v4
         with:
           repository: microsoft/Agent365-metrics
           ref: main
-          path: metrics-repo
           token: ${{ secrets.METRICS_REPO_TOKEN }}
 
-      - name: Determine Testing Stage
-        id: stage
-        env:
-          EVENT_NAME: ${{ github.event_name }}
-          EVENT_TYPE: ${{ github.event.workflow_run.event }}
-          HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
-          DISPATCH_STAGE: ${{ github.event.inputs.stage }}
-        run: |
-          if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
-            STAGE="$DISPATCH_STAGE"
-          elif [ "$EVENT_TYPE" = "schedule" ]; then
-            STAGE="scheduled"
-          elif [ "$EVENT_TYPE" = "pull_request" ]; then
-            STAGE="pre-checkin"
-          elif [ "$HEAD_BRANCH" = "main" ]; then
-            STAGE="post-checkin"
-          else
-            STAGE="pre-checkin"
-          fi
-          echo "stage=$STAGE" >> $GITHUB_OUTPUT
-          echo "Testing stage: $STAGE"
-
-      - name: Download E2E Test Artifacts
-        if: github.event_name == 'workflow_run'
-        uses: actions/download-artifact@v4
+      - name: Fetch E2E Test Results from Workflow Run
+        id: results
+        uses: actions/github-script@v7
         with:
-          run-id: ${{ github.event.workflow_run.id }}
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          path: ./artifacts
-        continue-on-error: true
-
-      - name: List Downloaded Artifacts
-        run: |
-          echo "=== Downloaded Artifacts ==="
-          find ./artifacts -type f -name "*.trx" 2>/dev/null || echo "No TRX files found"
-          find ./artifacts -type f -name "*.json" 2>/dev/null || echo "No JSON files found"
-
-      - name: Extract SDK Versions from Workflow Run
-        id: sdk-versions
-        if: github.event_name == 'workflow_run'
-        run: |
-          # SDK versions are logged in workflow summaries
-          # For now, we'll extract from artifacts if available
-          echo "sdk_versions={}" >> $GITHUB_OUTPUT
-
-      - name: Process Test Results
-        id: process
-        shell: pwsh
-        env:
-          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
-          CURRENT_RUN_ID: ${{ github.run_id }}
-          HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
-          CURRENT_SHA: ${{ github.sha }}
-          HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
-          CURRENT_REF: ${{ github.ref_name }}
-          ACTOR_LOGIN: ${{ github.event.workflow_run.actor.login }}
-          CURRENT_ACTOR: ${{ github.actor }}
-          WORKFLOW_NAME: ${{ github.event.workflow_run.name }}
-          CURRENT_WORKFLOW: ${{ github.workflow }}
-          STAGE: ${{ steps.stage.outputs.stage }}
-        run: |
-          $scriptsPath = "./scripts/e2e"
-          $artifactsPath = "./artifacts"
-          $metricsDir = "./metrics-repo/docs/raw"
-          $historyFile = "./metrics-repo/docs/history.json"
-          $stage = $env:STAGE
-          $runId = if ($env:WORKFLOW_RUN_ID) { $env:WORKFLOW_RUN_ID } else { $env:CURRENT_RUN_ID }
-          $commitSha = if ($env:HEAD_SHA) { $env:HEAD_SHA } else { $env:CURRENT_SHA }
-          $branch = if ($env:HEAD_BRANCH) { $env:HEAD_BRANCH } else { $env:CURRENT_REF }
-          $actor = if ($env:ACTOR_LOGIN) { $env:ACTOR_LOGIN } else { $env:CURRENT_ACTOR }
-          $workflow = if ($env:WORKFLOW_NAME) { $env:WORKFLOW_NAME } else { $env:CURRENT_WORKFLOW }
-          
-          # Create metrics directory
-          New-Item -ItemType Directory -Path $metricsDir -Force | Out-Null
-          
-          # Find all test result files
-          $trxFiles = Get-ChildItem -Path $artifactsPath -Filter "*.trx" -Recurse -ErrorAction SilentlyContinue
-          
-          if ($trxFiles.Count -eq 0) {
-            Write-Host "No TRX files found in artifacts" -ForegroundColor Yellow
+          script: |
+            // Get the workflow run ID
+            let runId;
+            if (context.eventName === 'workflow_run') {
+              runId = context.payload.workflow_run.id;
+            } else if (context.payload.inputs && context.payload.inputs.run_id) {
+              runId = parseInt(context.payload.inputs.run_id);
+            } else {
+              core.setFailed('No workflow run ID available');
+              return;
+            }
             
-            # Create a placeholder metric for the run
-            $metrics = @{
-              id = "$runId-no-results"
-              runId = $runId
-              sampleName = "unknown"
-              timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
-              commitSha = $commitSha
-              branch = $branch
-              actor = $actor
-              workflow = $workflow
-              stage = $stage
-              testResults = @{
-                status = "no-results"
-                passed = 0
-                failed = 0
-                skipped = 0
-                total = 0
-              }
-              sdkVersions = @{}
-              bugsCaught = @{
-                count = 0
-                stage = $stage
-                details = @()
-              }
+            console.log(`Fetching results for workflow run: ${runId}`);
+            
+            // Get the workflow run details
+            const { data: workflowRun } = await github.rest.actions.getWorkflowRun({
+              owner: 'microsoft',
+              repo: 'Agent365-Samples',
+              run_id: runId
+            });
+            
+            console.log(`Workflow: ${workflowRun.name}`);
+            console.log(`Status: ${workflowRun.status}`);
+            console.log(`Conclusion: ${workflowRun.conclusion}`);
+            console.log(`Branch: ${workflowRun.head_branch}`);
+            console.log(`Commit: ${workflowRun.head_sha}`);
+            
+            // Get all jobs for the workflow run
+            const { data: jobsData } = await github.rest.actions.listJobsForWorkflowRun({
+              owner: 'microsoft',
+              repo: 'Agent365-Samples',
+              run_id: runId
+            });
+            
+            // Map job names to sample names
+            const sampleMapping = {
+              'Python OpenAI E2E': 'python-openai',
+              'Python Agent Framework E2E': 'python-af',
+              'Node.js OpenAI E2E': 'nodejs-openai',
+              'Node.js LangChain E2E': 'nodejs-langchain',
+              '.NET Semantic Kernel E2E': 'dotnet-sk',
+              '.NET Agent Framework E2E': 'dotnet-af'
+            };
+            
+            // Determine testing stage
+            let stage = 'scheduled';
+            if (workflowRun.event === 'pull_request') {
+              stage = 'pre-checkin';
+            } else if (workflowRun.event === 'push' && workflowRun.head_branch === 'main') {
+              stage = 'post-checkin';
+            } else if (workflowRun.event === 'schedule') {
+              stage = 'scheduled';
             }
             
-            $metricsFile = Join-Path $metricsDir "$runId-no-results.json"
-            $metrics | ConvertTo-Json -Depth 10 | Out-File $metricsFile -Encoding UTF8
-            Write-Host "Created placeholder metrics file"
-          }
-          else {
-            Write-Host "Found $($trxFiles.Count) TRX files" -ForegroundColor Green
+            // Create one entry per sample (matches dashboard format)
+            const entries = [];
+            let passedCount = 0;
+            let failedCount = 0;
             
-            foreach ($trxFile in $trxFiles) {
-              # Extract sample name from path
-              $pathParts = $trxFile.DirectoryName -split '[/\\]'
-              $sampleName = ($pathParts | Where-Object { $_ -match "(python|nodejs|dotnet)" }) -join "-"
-              if (-not $sampleName) { $sampleName = "unknown" }
+            for (const job of jobsData.jobs) {
+              const sampleName = sampleMapping[job.name];
+              if (!sampleName) {
+                console.log(`Skipping job: ${job.name} (not a sample job)`);
+                continue;
+              }
               
-              Write-Host "Processing: $($trxFile.Name) for sample: $sampleName" -ForegroundColor Cyan
+              const passed = job.conclusion === 'success';
+              const failed = job.conclusion === 'failure';
               
-              # Emit metrics for this test result
-              $metricsFile = Join-Path $metricsDir "$runId-$sampleName.json"
+              if (passed) passedCount++;
+              if (failed) failedCount++;
               
-              & "$scriptsPath/Emit-TestMetrics.ps1" `
-                -SampleName $sampleName `
-                -TestResultsPath $trxFile.FullName `
-                -Stage $stage `
-                -OutputPath $metricsFile `
-                -RunId $runId `
-                -CommitSha $commitSha `
-                -Branch $branch
+              // Create entry in dashboard-expected format
+              const entry = {
+                id: `run-${runId}-${sampleName}`,
+                timestamp: workflowRun.created_at,
+                stage: stage,
+                sampleName: sampleName,
+                sdkVersions: {}, // SDK versions not available from API
+                testResults: {
+                  status: passed ? 'passed' : (failed ? 'failed' : 'skipped'),
+                  total: 1,
+                  passed: passed ? 1 : 0,
+                  failed: failed ? 1 : 0,
+                  skipped: (!passed && !failed) ? 1 : 0
+                },
+                bugsCaught: {
+                  count: 0,
+                  details: []
+                },
+                runUrl: workflowRun.html_url
+              };
+              
+              entries.push(entry);
+              console.log(`${sampleName}: ${job.conclusion}`);
             }
-          }
+            
+            // Write entries to file
+            const fs = require('fs');
+            fs.writeFileSync('new-entries.json', JSON.stringify(entries, null, 2));
+            
+            core.setOutput('run_id', runId);
+            core.setOutput('conclusion', workflowRun.conclusion);
+            core.setOutput('passed', passedCount);
+            core.setOutput('failed', failedCount);
+            core.setOutput('stage', stage);
+            core.setOutput('branch', workflowRun.head_branch);
+            core.setOutput('entry_count', entries.length);
+            
+            console.log(`\n=== Created ${entries.length} entries ===`);
+
+      - name: Update History File
+        id: update
+        run: |
+          HISTORY_FILE="docs/history.json"
+          NEW_ENTRIES_FILE="new-entries.json"
           
-          # Aggregate all metrics
-          Write-Host "Aggregating metrics..." -ForegroundColor Cyan
-          & "$scriptsPath/Aggregate-Metrics.ps1" `
-            -MetricsDir $metricsDir `
-            -HistoryFile $historyFile
+          echo "=== Updating history file ==="
           
-          # Output metrics updated flag
-          echo "metrics_updated=true" >> $env:GITHUB_OUTPUT
+          # Read the new entries
+          NEW_ENTRIES=$(cat "$NEW_ENTRIES_FILE")
+          ENTRY_COUNT=$(echo "$NEW_ENTRIES" | jq 'length')
+          echo "New entries to add: $ENTRY_COUNT"
           
-          # Check for failures that need GitHub issues
-          $rawFiles = Get-ChildItem -Path $metricsDir -Filter "*.json" -ErrorAction SilentlyContinue
-          $hasFailures = $false
-          foreach ($file in $rawFiles) {
-            $data = Get-Content $file.FullName | ConvertFrom-Json
-            if ($data.testResults.failed -gt 0) {
-              $hasFailures = $true
-              echo "has_failures=true" >> $env:GITHUB_OUTPUT
-              echo "failed_metrics_file=$($file.FullName)" >> $env:GITHUB_OUTPUT
-              break
-            }
-          }
+          # Read existing history or create new one
+          if [ -f "$HISTORY_FILE" ]; then
+            HISTORY=$(cat "$HISTORY_FILE")
+          else
+            HISTORY='{"lastUpdated":null,"totalRuns":0,"entries":[],"summary":{},"pullRequests":[]}'
+          fi
           
-          # Clean up raw metrics (they're now in history)
-          Remove-Item -Path $metricsDir -Recurse -Force -ErrorAction SilentlyContinue
-
-      - name: Commit Metrics Update
-        if: steps.process.outputs.metrics_updated == 'true'
-        working-directory: metrics-repo
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
+          # Get run ID from first entry to check for duplicates
+          RUN_ID="${{ steps.results.outputs.run_id }}"
           
-          git add docs/history.json
+          # Remove any existing entries from this run (by matching run-XXXX- prefix in id)
+          HISTORY=$(echo "$HISTORY" | jq --arg runId "run-${RUN_ID}-" '
+            .entries = [.entries[] | select(.id | startswith($runId) | not)]
+          ')
+          
+          # Add new entries at the beginning
+          HISTORY=$(echo "$HISTORY" | jq --argjson newEntries "$NEW_ENTRIES" '
+            .entries = $newEntries + .entries
+          ')
           
-          # Check if there are changes to commit
-          if git diff --staged --quiet; then
-            echo "No changes to commit"
+          # Update metadata
+          TOTAL_RUNS=$(echo "$HISTORY" | jq '.entries | length')
+          LAST_UPDATED=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+          
+          # Calculate summary stats
+          PASSED=$(echo "$HISTORY" | jq '[.entries[].testResults.passed] | add // 0')
+          FAILED=$(echo "$HISTORY" | jq '[.entries[].testResults.failed] | add // 0')
+          TOTAL=$((PASSED + FAILED))
+          if [ "$TOTAL" -gt 0 ]; then
+            PASS_RATE=$((PASSED * 100 / TOTAL))
           else
-            git commit -m "📊 Update E2E test metrics [skip ci]"
-            git push origin main
-            echo "✅ Metrics committed and pushed to Agent365-metrics"
+            PASS_RATE=0
           fi
+          
+          HISTORY=$(echo "$HISTORY" | jq --arg updated "$LAST_UPDATED" --argjson total "$TOTAL_RUNS" --argjson passRate "$PASS_RATE" '
+            .lastUpdated = $updated |
+            .totalRuns = $total |
+            .summary.passRate = $passRate
+          ')
+          
+          # Keep only last 200 entries to prevent file from growing too large
+          HISTORY=$(echo "$HISTORY" | jq '.entries = .entries[:200]')
+          
+          # Write updated history
+          echo "$HISTORY" | jq '.' > "$HISTORY_FILE"
+          
+          echo "✅ History file updated"
+          echo "Total entries: $TOTAL_RUNS"
+          echo "Pass rate: $PASS_RATE%"
+          
+          # Check if there are changes
+          git diff --quiet "$HISTORY_FILE" && echo "has_changes=false" >> $GITHUB_OUTPUT || echo "has_changes=true" >> $GITHUB_OUTPUT
 
-      - name: Create GitHub Issues for Failures
-        if: steps.process.outputs.has_failures == 'true'
-        shell: pwsh
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Commit and Push Metrics
+        if: steps.update.outputs.has_changes == 'true'
         run: |
-          $scriptsPath = "./scripts/e2e"
-          $metricsFile = "${{ steps.process.outputs.failed_metrics_file }}"
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
           
-          if (Test-Path $metricsFile) {
-            Write-Host "Creating GitHub issues for failures..." -ForegroundColor Cyan
-            & "$scriptsPath/Create-GitHubIssue.ps1" `
-              -MetricsFile $metricsFile `
-              -Repository "${{ github.repository }}"
-          } else {
-            Write-Host "No metrics file found for issue creation" -ForegroundColor Yellow
-          }
+          git add docs/history.json
+          git commit -m "📊 Update E2E metrics for run #${{ steps.results.outputs.run_id }} [skip ci]"
+          git push origin main
+          
+          echo "✅ Metrics committed and pushed to Agent365-metrics"
 
-      - name: Generate Metrics Summary
+      - name: Generate Summary
         run: |
-          echo "## 📊 E2E Test Metrics Updated" >> $GITHUB_STEP_SUMMARY
+          echo "## 📊 E2E Test Metrics Collected" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "**Run ID:** ${{ steps.results.outputs.run_id }}" >> $GITHUB_STEP_SUMMARY
+          echo "**Branch:** ${{ steps.results.outputs.branch }}" >> $GITHUB_STEP_SUMMARY
+          echo "**Stage:** ${{ steps.results.outputs.stage }}" >> $GITHUB_STEP_SUMMARY
+          echo "**Result:** ${{ steps.results.outputs.conclusion }}" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
-          echo "Testing Stage: **${{ steps.stage.outputs.stage }}**" >> $GITHUB_STEP_SUMMARY
+          echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY
+          echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
+          echo "| ✅ Passed | ${{ steps.results.outputs.passed }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| ❌ Failed | ${{ steps.results.outputs.failed }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| 📝 Entries | ${{ steps.results.outputs.entry_count }} |" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
           echo "View the [E2E Testing Dashboard](https://agent365-metrics-dashboard.azurewebsites.net/api/e2e-dashboard) for detailed statistics." >> $GITHUB_STEP_SUMMARY