-
Notifications
You must be signed in to change notification settings - Fork 695
test: Proposal A v3 - Unit Tests for Feedback Scoring (Phase 4) #506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -316,3 +316,35 @@ export function extractReflectionSliceItems(reflectionText: string): ReflectionS | |
| export function extractInjectableReflectionSliceItems(reflectionText: string): ReflectionSliceItem[] { | ||
| return buildReflectionSliceItemsFromSlices(extractInjectableReflectionSlices(reflectionText)); | ||
| } | ||
|
|
||
| /** | ||
| * 判斷回應是否實際使用了注入的記憶 ID 或摘要。 | ||
| * - 回應長度 <= 24:不視為使用(太短) | ||
| * - injectedIds + injectedSummaries 都為空:不視為使用 | ||
| * - 有 usage marker(如「教練我記得」)+ 對應 ID:視為使用 | ||
| * - 有 usage marker + verbatim summary match(>=10 字元):視為使用 | ||
| */ | ||
| export function isRecallUsed( | ||
| response: string, | ||
| injectedIds: string[], | ||
| injectedSummaries: string[] | ||
| ): boolean { | ||
| if (response.length <= 24) return false; | ||
| if (injectedIds.length === 0 && injectedSummaries.length === 0) return false; | ||
|
|
||
| const hasUsageMarker = /教練|教練我|教練我記得|記得|memory|id[-:]/i.test(response); | ||
|
|
||
| if (injectedIds.length > 0) { | ||
| const hasMatchingId = injectedIds.some(id => response.includes(id)); | ||
| if (hasMatchingId && hasUsageMarker) return true; | ||
| } | ||
|
|
||
| if (injectedSummaries.length > 0) { | ||
| const hasMatchingSummary = injectedSummaries.some( | ||
| s => s.length >= 10 && response.includes(s) | ||
| ); | ||
| if (hasMatchingSummary) return true; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The summary branch returns Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
| return false; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| // test/bad-recall-count.test.mjs | ||
| // 測試 bad_recall_count 遞增邏輯(純邏輯測試,mock store 行為) | ||
| import { describe, it } from 'node:test'; | ||
| import assert from 'node:assert'; | ||
|
|
||
| // 測試 bad_recall_count 的遞增邏輯(純邏輯測試,mock store) | ||
| describe("bad_recall_count logic", () => { | ||
| function computeNextBadCount(current, isMiss, isConfirm, recallCount, minPenaltyThreshold) { | ||
| if (isConfirm) return 0; // 確認使用,重置為 0 | ||
| if (!isMiss) return current; // 既不是 miss 也不是 confirm,保持現值 | ||
| if (recallCount < minPenaltyThreshold) return current; // recall 次數不夠,不 penalty | ||
| return current + 1; // miss + 足夠次數,遞增 | ||
| } | ||
|
|
||
| it("confirm resets count to 0", () => { | ||
| assert.strictEqual(computeNextBadCount(5, false, true, 3, 2), 0); | ||
| }); | ||
| it("miss with insufficient recall count does not increment", () => { | ||
| assert.strictEqual(computeNextBadCount(0, true, false, 1, 2), 0); | ||
| }); | ||
| it("miss with sufficient recall count increments", () => { | ||
| assert.strictEqual(computeNextBadCount(1, true, false, 2, 2), 2); | ||
| }); | ||
| it("non-miss non-confirm keeps current value", () => { | ||
| assert.strictEqual(computeNextBadCount(3, false, false, 3, 2), 3); | ||
| }); | ||
| it("reaches penalty threshold at badCount=2", () => { | ||
| // badCount >= 2 會觸發 penalty | ||
| const badCount = 2; | ||
| const isPenaltyTriggered = badCount >= 2; | ||
| assert.strictEqual(isPenaltyTriggered, true); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| // test/feedback-config.test.mjs | ||
| // 測試 FeedbackConfigManager(Phase 3 回饋信號反饋配置管理器) | ||
| import { describe, it } from 'node:test'; | ||
| import assert from 'node:assert'; | ||
|
|
||
| // FeedbackConfigManager mock(從 Phase 3 來的實作) | ||
| class FeedbackConfigManager { | ||
| constructor(config) { | ||
| this.config = config; | ||
| } | ||
| computeImportanceDelta(event, recallCount = 1, badRecallCount = 0) { | ||
| if (event === 'use') { | ||
| if (recallCount < this.config.minRecallCountForBoost) return 0; | ||
| return this.config.importanceBoostOnUse; | ||
| } | ||
| if (event === 'confirm') { | ||
| return this.config.importanceBoostOnConfirm; | ||
| } | ||
| if (event === 'miss') { | ||
| if (recallCount < this.config.minRecallCountForPenalty) return 0; | ||
| return -this.config.importancePenaltyOnMiss; | ||
| } | ||
| if (event === 'error') { | ||
| return -this.config.importancePenaltyOnError; | ||
| } | ||
| return 0; | ||
| } | ||
| isConfirmKeyword(text) { | ||
| return this.config.confirmKeywords.some(k => text.toLowerCase().includes(k.toLowerCase())); | ||
| } | ||
| isErrorKeyword(text) { | ||
| return this.config.errorKeywords.some(k => text.toLowerCase().includes(k.toLowerCase())); | ||
| } | ||
| static defaultConfig() { | ||
| return { | ||
| importanceBoostOnUse: 0.05, | ||
| importanceBoostOnConfirm: 0.15, | ||
| importancePenaltyOnMiss: 0.03, | ||
| importancePenaltyOnError: 0.10, | ||
| minRecallCountForPenalty: 2, | ||
| minRecallCountForBoost: 1, | ||
| confirmKeywords: ["是對的", "確認", "正確", "right"], | ||
| errorKeywords: ["錯誤", "不對", "wrong", "not right"], | ||
| }; | ||
| } | ||
| } | ||
|
|
||
| describe("FeedbackConfigManager", () => { | ||
| describe("computeImportanceDelta", () => { | ||
| it("use event with recallCount >= minRecallCountForBoost returns boostOnUse", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| const delta = mgr.computeImportanceDelta('use', 1, 0); | ||
| assert.strictEqual(delta, 0.05); | ||
| }); | ||
| it("confirm event returns boostOnConfirm", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| const delta = mgr.computeImportanceDelta('confirm', 1, 0); | ||
| assert.strictEqual(delta, 0.15); | ||
| }); | ||
| it("miss event with recallCount < minRecallCountForPenalty returns 0", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| const delta = mgr.computeImportanceDelta('miss', 1, 0); // recallCount=1 < 2 | ||
| assert.strictEqual(delta, 0); | ||
| }); | ||
| it("miss event with recallCount >= minRecallCountForPenalty returns penalty", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| const delta = mgr.computeImportanceDelta('miss', 2, 0); // recallCount=2 >= 2 | ||
| assert.strictEqual(delta, -0.03); | ||
| }); | ||
| it("error event returns error penalty", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| const delta = mgr.computeImportanceDelta('error', 1, 0); | ||
| assert.strictEqual(delta, -0.10); | ||
| }); | ||
| }); | ||
| describe("isConfirmKeyword", () => { | ||
| it("detects 是對的", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| assert.strictEqual(mgr.isConfirmKeyword("教練我覺得是對的"), true); | ||
| }); | ||
| it("detects right", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| assert.strictEqual(mgr.isConfirmKeyword("that's right"), true); | ||
| }); | ||
| it("rejects unrelated text", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| assert.strictEqual(mgr.isConfirmKeyword("今天天氣很好"), false); | ||
| }); | ||
| }); | ||
| describe("isErrorKeyword", () => { | ||
| it("detects 錯誤", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| assert.strictEqual(mgr.isErrorKeyword("教練這是錯誤的"), true); | ||
| }); | ||
| it("rejects unrelated text", () => { | ||
| const mgr = new FeedbackConfigManager(FeedbackConfigManager.defaultConfig()); | ||
| assert.strictEqual(mgr.isErrorKeyword("今天天氣很好"), false); | ||
| }); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| // test/isRecallUsed.test.mjs | ||
| // 測試 isRecallUsed() 函式 - 判斷回應是否實際使用了注入的記憶 ID 或摘要 | ||
| import { describe, it } from 'node:test'; | ||
| import assert from 'node:assert'; | ||
| import { isRecallUsed } from '../src/reflection-slices.ts'; | ||
|
|
||
| describe("isRecallUsed", () => { | ||
| it("returns false for short response (<=24 chars)", () => { | ||
| assert.strictEqual(isRecallUsed("hi", ["id1"], []), false); | ||
| }); | ||
| it("returns false when both injectedIds and injectedSummaries are empty", () => { | ||
| assert.strictEqual(isRecallUsed("這是一個很長的回應內容這是", [], []), false); | ||
| }); | ||
| it("returns true when injected ID is present AND usage marker is present", () => { | ||
| const response = "教練我記得這件事 memory id-abc123"; | ||
| const injectedIds = ["id-abc123"]; | ||
| assert.strictEqual(isRecallUsed(response, injectedIds, []), true); | ||
| }); | ||
| it("returns false when only ID is present but no usage marker", () => { | ||
| const response = "我提到了id-abc123這個項目"; | ||
| const injectedIds = ["id-abc123"]; | ||
| assert.strictEqual(isRecallUsed(response, injectedIds, []), false); | ||
| }); | ||
| it("returns false when only usage marker is present but no ID", () => { | ||
| const response = "教練我記得這件事但沒有提到任何ID"; | ||
| const injectedIds = ["id-abc123"]; | ||
| assert.strictEqual(isRecallUsed(response, injectedIds, []), false); | ||
| }); | ||
| it("returns true for verbatim summary match (>=10 chars)", () => { | ||
| // 回應長度 > 24,且包含已注入摘要(摘要為回應的子字串,且 >= 10 字元) | ||
| const response = "教練xx這是關於Python的import機制的詳細說明"; | ||
| const injectedIds = []; | ||
| const injectedSummaries = ["這是關於Python的import機制的詳細說明"]; | ||
| assert.strictEqual(isRecallUsed(response, injectedIds, injectedSummaries), true); | ||
| }); | ||
| it("returns false for short summary (<10 chars)", () => { | ||
| const response = "教練提到了test這個詞"; | ||
| const injectedIds = []; | ||
| const injectedSummaries = ["test"]; | ||
| assert.strictEqual(isRecallUsed(response, injectedIds, injectedSummaries), false); | ||
| }); | ||
| }); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
id-pattern from usage-marker detectionThe usage-marker regex currently includes
id[-:], so any long response that merely repeats an injected ID (for example... id-abc123 ...) setshasUsageMarker=true. That makes thehasMatchingId && hasUsageMarkercheck effectively equivalent tohasMatchingIdfor common ID formats, which contradicts the intended "ID + marker" gate and will overcount recall usage in feedback scoring paths.Useful? React with 👍 / 👎.