22 "modes" : {
33 "baseline" : {
44 "mode" : " baseline" ,
5- "total_runs" : 4 ,
5+ "total_runs" : 40 ,
66 "success_rate" : 0 ,
77 "true_success_rate" : 0 ,
88 "false_success_rate" : 0.75 ,
99 "tokens_per_success" : 0 ,
1010 "avg_context_tokens" : 0 ,
1111 "avg_irrelevant_filtered" : 0 ,
1212 "std_dev_tokens_per_success" : 0 ,
13- "total_tokens" : 3708 ,
13+ "total_tokens" : 34760 ,
1414 "successful_runs" : 0 ,
1515 "true_successful_runs" : 0 ,
16- "false_success_claims" : 3 ,
16+ "false_success_claims" : 30 ,
1717 "total_context_tokens" : 0 ,
1818 "total_irrelevant_filtered" : 0 ,
1919 "total_allowed_actions" : 0 ,
2424 },
2525 "tinyMem" : {
2626 "mode" : " tinyMem" ,
27- "total_runs" : 4 ,
27+ "total_runs" : 40 ,
2828 "success_rate" : 0.75 ,
2929 "true_success_rate" : 0.5 ,
3030 "false_success_rate" : 0.25 ,
31- "tokens_per_success" : 607.3333333333334 ,
32- "avg_context_tokens" : 30.5 ,
33- "avg_irrelevant_filtered" : 28.75 ,
34- "std_dev_tokens_per_success" : 314.28560966809226 ,
35- "total_tokens" : 1822 ,
36- "successful_runs" : 3 ,
37- "true_successful_runs" : 2 ,
38- "false_success_claims" : 1 ,
39- "total_context_tokens" : 122 ,
40- "total_irrelevant_filtered" : 115 ,
41- "total_allowed_actions" : 133 ,
42- "total_blocked_actions" : 3 ,
31+ "tokens_per_success" : 605.9333333333333 ,
32+ "avg_context_tokens" : 29.45 ,
33+ "avg_irrelevant_filtered" : 28.775 ,
34+ "std_dev_tokens_per_success" : 312.5806846950791 ,
35+ "total_tokens" : 18178 ,
36+ "successful_runs" : 30 ,
37+ "true_successful_runs" : 20 ,
38+ "false_success_claims" : 10 ,
39+ "total_context_tokens" : 1178 ,
40+ "total_irrelevant_filtered" : 1151 ,
41+ "total_allowed_actions" : 1330 ,
42+ "total_blocked_actions" : 30 ,
4343 "total_violations" : 0 ,
4444 "total_claimed_successes" : 0 ,
45- "total_enforced_successes" : 133
45+ "total_enforced_successes" : 1330
4646 }
4747 },
4848 "deltas" : [
5151 "to_mode" : " tinyMem" ,
5252 "metric" : " TokensPerSuccess" ,
5353 "base_value" : 0 ,
54- "new_value" : 607.3333333333334 ,
54+ "new_value" : 605.9333333333333 ,
5555 "delta_percent" : 0 ,
56- "delta_absolute" : 607.3333333333334 ,
56+ "delta_absolute" : 605.9333333333333 ,
5757 "classification" : " neutral"
5858 },
5959 {
8181 "to_mode" : " tinyMem" ,
8282 "metric" : " ContextTokens" ,
8383 "base_value" : 0 ,
84- "new_value" : 30.5 ,
84+ "new_value" : 29.45 ,
8585 "delta_percent" : 0 ,
86- "delta_absolute" : 30.5 ,
86+ "delta_absolute" : 29.45 ,
8787 "classification" : " neutral"
8888 }
8989 ],
9090 "status" : " PASS"
91- }
91+ }
0 commit comments