Skip to content

Commit f2d6c02

Browse files
code_intel: omc_code_diff + omc_code_metrics + 14 tests
omc_code_diff(a, b) — structural diff between two programs after canonicalization. Returns {added, removed, modified, unchanged} as function-name arrays. Alpha-renames don't show up as modifications because hashes are computed on the canonical form. The LLM's "what did my edit actually change?" check. omc_code_metrics(code) — one-shot bulk metrics: {complexity, ast_size, ast_depth, source_bytes, token_count, compression_ratio}. Saves N round-trips through MCP for the common case where the LLM wants all stats at once. 14 new tests cover: - diff: identical / added / removed / modified / alpha-rename - metrics: all fields present, complexity ordering, token_count positive, compression_ratio positive - composition: diff-then-metrics workflow - summary classes/imports/stmt_count completeness - canonical idempotence (canonicalize(canonical) == canonical) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 7e160d6 commit f2d6c02

5 files changed

Lines changed: 258 additions & 3 deletions

File tree

OMC_REFERENCE.md

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
Auto-generated from `omnimcode-core/src/docs.rs`. Run `omc --gen-docs > OMC_REFERENCE.md` to regenerate.
44

5-
**Total documented builtins**: 538
5+
**Total documented builtins**: 540
66

7-
**OMC-unique**: 60 (no direct Python/NumPy equivalent — these are why you reach for OMC over numpy)
7+
**OMC-unique**: 61 (no direct Python/NumPy equivalent — these are why you reach for OMC over numpy)
88

99
---
1010

@@ -25,7 +25,7 @@ Auto-generated from `omnimcode-core/src/docs.rs`. Run `omc --gen-docs > OMC_REFE
2525
- [exceptions](#exceptions) (2 builtins)
2626
- [introspection](#introspection) (22 builtins)
2727
- [tokenizer](#tokenizer) (16 builtins)
28-
- [code_intel](#code_intel) (14 builtins)
28+
- [code_intel](#code_intel) (16 builtins)
2929
- [math](#math) (58 builtins)
3030
- [dicts](#dicts) (26 builtins)
3131
- [test_runner](#test_runner) (8 builtins)
@@ -4555,6 +4555,26 @@ Hash blended with substrate-resonance of the hash itself — OMC-only dual-band
45554555
omc_hbit_hash("h x = 1;") // substrate-weighted int
45564556
```
45574557

4558+
### `omc_code_diff` 🔱 *OMC-unique*
4559+
4560+
**Signature**: `(a: string, b: string) -> dict`
4561+
4562+
Structural diff between two programs (after canonicalization). {added, removed, modified, unchanged} as function-name arrays.
4563+
4564+
```omc
4565+
omc_code_diff(old, new) // {modified: ["loss"], ...}
4566+
```
4567+
4568+
### `omc_code_metrics`
4569+
4570+
**Signature**: `(code: string) -> dict`
4571+
4572+
Bulk metrics: {complexity, ast_size, ast_depth, source_bytes, token_count, compression_ratio}. One call instead of N.
4573+
4574+
```omc
4575+
omc_code_metrics(src) // all stats at once
4576+
```
4577+
45584578
---
45594579

45604580
## math
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Additional code_intel coverage including diff + metrics.
2+
3+
fn assert_eq(actual, expected, msg) {
4+
if actual != expected {
5+
test_record_failure(msg + ": expected " + to_string(expected) + " got " + to_string(actual));
6+
}
7+
}
8+
9+
fn assert_true(cond, msg) { if !cond { test_record_failure(msg); } }
10+
11+
# omc_code_diff
12+
fn test_diff_identical() {
13+
h d = omc_code_diff(
14+
"fn f(x) { return x; }",
15+
"fn f(x) { return x; }"
16+
);
17+
assert_eq(arr_len(dict_get(d, "added")), 0, "no additions");
18+
assert_eq(arr_len(dict_get(d, "removed")), 0, "no removals");
19+
assert_eq(arr_len(dict_get(d, "modified")), 0, "no mods");
20+
assert_eq(arr_len(dict_get(d, "unchanged")), 1, "one unchanged");
21+
}
22+
23+
fn test_diff_added() {
24+
h d = omc_code_diff(
25+
"fn f(x) { return x; }",
26+
"fn f(x) { return x; } fn g(x) { return x; }"
27+
);
28+
assert_eq(arr_len(dict_get(d, "added")), 1, "g added");
29+
}
30+
31+
fn test_diff_removed() {
32+
h d = omc_code_diff(
33+
"fn f(x) { return x; } fn g(x) { return x; }",
34+
"fn f(x) { return x; }"
35+
);
36+
assert_eq(arr_len(dict_get(d, "removed")), 1, "g removed");
37+
}
38+
39+
fn test_diff_modified() {
40+
h d = omc_code_diff(
41+
"fn f(x) { return x; }",
42+
"fn f(x) { return x + 1; }"
43+
);
44+
assert_eq(arr_len(dict_get(d, "modified")), 1, "f modified");
45+
}
46+
47+
fn test_diff_alpha_rename_unchanged() {
48+
h d = omc_code_diff(
49+
"fn f(x) { return x; }",
50+
"fn f(a) { return a; }"
51+
);
52+
assert_eq(arr_len(dict_get(d, "unchanged")), 1, "alpha-rename is unchanged");
53+
assert_eq(arr_len(dict_get(d, "modified")), 0, "no actual mods");
54+
}
55+
56+
# omc_code_metrics
57+
fn test_metrics_has_all_fields() {
58+
h m = omc_code_metrics("fn f(x) { return x; }");
59+
assert_true(dict_has(m, "complexity"), "has complexity");
60+
assert_true(dict_has(m, "ast_size"), "has ast_size");
61+
assert_true(dict_has(m, "ast_depth"), "has ast_depth");
62+
assert_true(dict_has(m, "source_bytes"), "has source_bytes");
63+
assert_true(dict_has(m, "token_count"), "has token_count");
64+
assert_true(dict_has(m, "compression_ratio"), "has compression_ratio");
65+
}
66+
67+
fn test_metrics_complexity_grows() {
68+
h simple = omc_code_metrics("fn f(x) { return x; }");
69+
h branchy = omc_code_metrics("fn f(x) { if x > 0 { if x > 1 { return 1; } return 0; } return 0; }");
70+
assert_true(dict_get(branchy, "complexity") > dict_get(simple, "complexity"),
71+
"complexity orders");
72+
}
73+
74+
fn test_metrics_token_count_positive() {
75+
h m = omc_code_metrics("fn f(x) { return x; }");
76+
assert_true(dict_get(m, "token_count") > 0, "non-zero tokens");
77+
}
78+
79+
fn test_metrics_compression_ratio_positive() {
80+
h m = omc_code_metrics("fn f(x) { return x; }");
81+
assert_true(dict_get(m, "compression_ratio") > 0.0, "non-zero ratio");
82+
}
83+
84+
# Composition tests
85+
fn test_diff_then_metrics() {
86+
# The basic LLM workflow: diff to see what changed, then metrics
87+
# on the new version.
88+
h old = "fn f(x) { return x; }";
89+
h new = "fn f(x) { return x + 1; } fn g(x) { return x * 2; }";
90+
h d = omc_code_diff(old, new);
91+
assert_eq(arr_len(dict_get(d, "added")), 1, "added g");
92+
h m = omc_code_metrics(new);
93+
assert_true(dict_get(m, "complexity") >= 1.0, "metrics on new");
94+
}
95+
96+
# Larger summary integration test
97+
fn test_summary_has_classes() {
98+
h s = omc_code_summary("class Foo { x; y; } fn f() {}");
99+
assert_eq(arr_len(dict_get(s, "classes")), 1, "one class");
100+
assert_eq(arr_get(dict_get(s, "classes"), 0), "Foo", "class name");
101+
}
102+
103+
fn test_summary_imports() {
104+
h s = omc_code_summary("import \"foo\"; fn main() {}");
105+
assert_eq(arr_len(dict_get(s, "imports")), 1, "one import");
106+
}
107+
108+
fn test_summary_stmt_count() {
109+
h s = omc_code_summary("fn a() {} fn b() {} fn c() {}");
110+
assert_eq(dict_get(s, "stmt_count"), 3, "three stmts");
111+
}
112+
113+
# canonical hash invariance through rounds of canonical
114+
fn test_canonical_idempotent() {
115+
h c1 = omc_code_canonical("fn f(x) { return x; }");
116+
h c2 = omc_code_canonical(c1);
117+
assert_eq(c1, c2, "canonical is idempotent");
118+
}

omnimcode-core/src/code_intel.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,67 @@ pub fn substrate_fingerprint(source: &str) -> Result<i64, String> {
391391
tokenizer::crt_pack(&streams, &moduli)
392392
}
393393

394+
/// Structural diff between two programs: which functions appear only
395+
/// in A, only in B, in both but with different bodies, or both with
396+
/// same body. Compared after canonicalization so renames don't show
397+
/// up as diffs.
398+
#[derive(Clone, Debug, Default)]
399+
pub struct CodeDiff {
400+
pub added: Vec<String>,
401+
pub removed: Vec<String>,
402+
pub modified: Vec<String>,
403+
pub unchanged: Vec<String>,
404+
}
405+
406+
pub fn diff(a: &str, b: &str) -> Result<CodeDiff, String> {
407+
let sa = summarise(a)?;
408+
let sb = summarise(b)?;
409+
use std::collections::HashMap;
410+
let a_map: HashMap<&str, i64> = sa.functions.iter()
411+
.map(|f| (f.name.as_str(), f.canonical_hash))
412+
.collect();
413+
let b_map: HashMap<&str, i64> = sb.functions.iter()
414+
.map(|f| (f.name.as_str(), f.canonical_hash))
415+
.collect();
416+
let mut diff = CodeDiff::default();
417+
for f in &sa.functions {
418+
match b_map.get(f.name.as_str()) {
419+
None => diff.removed.push(f.name.clone()),
420+
Some(&bh) if bh == f.canonical_hash => diff.unchanged.push(f.name.clone()),
421+
Some(_) => diff.modified.push(f.name.clone()),
422+
}
423+
}
424+
for f in &sb.functions {
425+
if !a_map.contains_key(f.name.as_str()) {
426+
diff.added.push(f.name.clone());
427+
}
428+
}
429+
diff.added.sort();
430+
diff.removed.sort();
431+
diff.modified.sort();
432+
diff.unchanged.sort();
433+
Ok(diff)
434+
}
435+
436+
/// Quick metrics: substrate score + complexity + size all in one shot.
437+
/// Computed in one parse-and-canonicalize pass each.
438+
pub fn quick_metrics(source: &str) -> Result<std::collections::BTreeMap<String, f64>, String> {
439+
let mut out = std::collections::BTreeMap::new();
440+
let cpx = complexity(source)? as f64;
441+
let size = ast_size(source)? as f64;
442+
let depth = ast_depth(source)? as f64;
443+
out.insert("complexity".to_string(), cpx);
444+
out.insert("ast_size".to_string(), size);
445+
out.insert("ast_depth".to_string(), depth);
446+
out.insert("source_bytes".to_string(), source.len() as f64);
447+
let ids = crate::tokenizer::encode(source).len() as f64;
448+
out.insert("token_count".to_string(), ids);
449+
if source.len() > 0 {
450+
out.insert("compression_ratio".to_string(), source.len() as f64 / ids.max(1.0));
451+
}
452+
Ok(out)
453+
}
454+
394455
#[cfg(test)]
395456
mod tests {
396457
use super::*;

omnimcode-core/src/docs.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,20 @@ pub const BUILTINS: &[BuiltinDoc] = &[
10591059
example: "omc_search_builtins(\"softmax\") // [\"arr_softmax\"]",
10601060
unique_to_omc: false,
10611061
},
1062+
BuiltinDoc {
1063+
name: "omc_code_diff", category: "code_intel",
1064+
signature: "(a: string, b: string) -> dict",
1065+
description: "Structural diff between two programs (after canonicalization). {added, removed, modified, unchanged} as function-name arrays.",
1066+
example: "omc_code_diff(old, new) // {modified: [\"loss\"], ...}",
1067+
unique_to_omc: true,
1068+
},
1069+
BuiltinDoc {
1070+
name: "omc_code_metrics", category: "code_intel",
1071+
signature: "(code: string) -> dict",
1072+
description: "Bulk metrics: {complexity, ast_size, ast_depth, source_bytes, token_count, compression_ratio}. One call instead of N.",
1073+
example: "omc_code_metrics(src) // all stats at once",
1074+
unique_to_omc: false,
1075+
},
10621076
// ---- Auto-generated docs for previously-undocumented builtins ----
10631077
// Each entry covers one runtime builtin that lacked introspection.
10641078
// Stubs are conservative — refine as you learn the actual signatures.

omnimcode-core/src/interpreter.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7705,6 +7705,48 @@ impl Interpreter {
77057705
.collect();
77067706
Ok(Value::Array(HArray::from_vec(out)))
77077707
}
7708+
"omc_code_diff" => {
7709+
// Structural diff: returns {added, removed, modified, unchanged}.
7710+
// Compared after canonicalization so renames don't show.
7711+
if args.len() < 2 {
7712+
return Err("omc_code_diff requires (a, b)".to_string());
7713+
}
7714+
let a = self.eval_expr(&args[0])?.to_display_string();
7715+
let b = self.eval_expr(&args[1])?.to_display_string();
7716+
let d = crate::code_intel::diff(&a, &b)
7717+
.map_err(|e| format!("omc_code_diff: {}", e))?;
7718+
let mut map = std::collections::BTreeMap::new();
7719+
map.insert("added".to_string(), Value::Array(HArray::from_vec(
7720+
d.added.iter().map(|s| Value::String(s.clone())).collect()
7721+
)));
7722+
map.insert("removed".to_string(), Value::Array(HArray::from_vec(
7723+
d.removed.iter().map(|s| Value::String(s.clone())).collect()
7724+
)));
7725+
map.insert("modified".to_string(), Value::Array(HArray::from_vec(
7726+
d.modified.iter().map(|s| Value::String(s.clone())).collect()
7727+
)));
7728+
map.insert("unchanged".to_string(), Value::Array(HArray::from_vec(
7729+
d.unchanged.iter().map(|s| Value::String(s.clone())).collect()
7730+
)));
7731+
Ok(Value::dict_from(map))
7732+
}
7733+
"omc_code_metrics" => {
7734+
// Bulk metrics in one call: complexity + ast_size +
7735+
// ast_depth + source_bytes + token_count +
7736+
// compression_ratio. Avoids N separate round-trips
7737+
// through the MCP server.
7738+
if args.is_empty() {
7739+
return Err("omc_code_metrics requires (code)".to_string());
7740+
}
7741+
let code = self.eval_expr(&args[0])?.to_display_string();
7742+
let m = crate::code_intel::quick_metrics(&code)
7743+
.map_err(|e| format!("omc_code_metrics: {}", e))?;
7744+
let mut map = std::collections::BTreeMap::new();
7745+
for (k, v) in m {
7746+
map.insert(k, Value::HFloat(v));
7747+
}
7748+
Ok(Value::dict_from(map))
7749+
}
77087750
"omc_search_builtins" => {
77097751
// Substring search across name + description. Returns
77107752
// matching names. Useful when you don't know what

0 commit comments

Comments
 (0)