diff --git a/Cargo.lock b/Cargo.lock index 9cc541c..ce73a15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -781,17 +781,35 @@ version = "0.5.0" dependencies = [ "clap", "indicatif", + "oxabl_analyze", "oxabl_ast", "oxabl_common", "oxabl_lexer", "oxabl_parser", "oxabl_preprocessor", + "oxabl_schema", + "oxabl_semantic", "oxabl_workspace", "serde", "serde_json", "walkdir", ] +[[package]] +name = "oxabl_analyze" +version = "0.1.0" +dependencies = [ + "oxabl_ast", + "oxabl_common", + "oxabl_lexer", + "oxabl_lint", + "oxabl_parser", + "oxabl_schema", + "oxabl_semantic", + "serde", + "serde_json", +] + [[package]] name = "oxabl_ast" version = "0.5.0" diff --git a/Cargo.toml b/Cargo.toml index 8cee129..cd84b49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,5 +11,6 @@ members = [ "crates/oxabl_schema", "crates/oxabl_semantic", "crates/oxabl_lint", + "crates/oxabl_analyze", ] resolver = "2" diff --git a/crates/oxabl/Cargo.toml b/crates/oxabl/Cargo.toml index ad7b81c..c9e0bd2 100644 --- a/crates/oxabl/Cargo.toml +++ b/crates/oxabl/Cargo.toml @@ -17,6 +17,9 @@ oxabl_common = { path = "../oxabl_common", version = "0.5.0" } oxabl_ast = { path = "../oxabl_ast", version = "0.5.0" } oxabl_workspace = { path = "../oxabl_workspace", version = "0.4.0" } oxabl_preprocessor = { path = "../oxabl_preprocessor", version = "0.3.1" } +oxabl_semantic = { path = "../oxabl_semantic", version = "0.1.0" } +oxabl_schema = { path = "../oxabl_schema", version = "0.1.0" } +oxabl_analyze = { path = "../oxabl_analyze", version = "0.1.0" } clap = { version = "4", features = ["derive"] } walkdir = "2" indicatif = "0.17" diff --git a/crates/oxabl/src/main.rs b/crates/oxabl/src/main.rs index 8f18d09..8853642 100644 --- a/crates/oxabl/src/main.rs +++ b/crates/oxabl/src/main.rs @@ -5,10 +5,13 @@ use std::time::Instant; use clap::Parser as ClapParser; use indicatif::{ProgressBar, ProgressStyle}; +use oxabl_analyze::{dump_json, dump_text}; use oxabl_common::{FileId, SourceMap}; use oxabl_lexer::tokenize; use oxabl_parser::Parser; use oxabl_preprocessor::Preprocessor; +use oxabl_schema::Schema; +use oxabl_semantic::{AnalysisContext, analyze_file}; use oxabl_workspace::RealFileSystem; use serde::Serialize; use walkdir::WalkDir; @@ -40,6 +43,27 @@ enum Cli { #[arg(long)] debug: bool, }, + /// Parse + semantic-analyze a single ABL file and dump the resolved model. + Analyze { + /// Path to the ABL source file to analyze. + path: PathBuf, + + /// Output format: `json` (stable, versioned) or `text` (human-oriented). + #[arg(long, default_value = "json")] + format: String, + + /// Skip the lint pass (semantic-layer diagnostics only). + #[arg(long)] + no_lint: bool, + + /// Enable preprocessing (include expansion, &IF evaluation). + #[arg(long)] + preprocess: bool, + + /// Include search paths (can be specified multiple times). + #[arg(long = "include-path", short = 'I')] + include_paths: Vec, + }, } enum FileResult { @@ -98,9 +122,100 @@ fn main() -> ExitCode { include_paths, debug, } => run_check(&path, json, preprocess, &include_paths, debug), + Cli::Analyze { + path, + format, + no_lint, + preprocess, + include_paths, + } => run_analyze(&path, &format, no_lint, preprocess, &include_paths), } } +fn run_analyze( + path: &Path, + format: &str, + no_lint: bool, + preprocess: bool, + include_paths: &[PathBuf], +) -> ExitCode { + let source = match std::fs::read_to_string(path) { + Ok(s) => s, + Err(e) => { + eprintln!("error: cannot read {}: {e}", path.display()); + return ExitCode::from(2); + } + }; + + // Run preprocess or take raw source as expanded source. + let expanded = if preprocess { + let fs = RealFileSystem; + let preprocessor = Preprocessor::new(&fs, include_paths); + let file_id = FileId::new(1); + match preprocessor.process(file_id, &source) { + Ok(pf) => pf.to_text().to_string(), + Err(diags) => { + eprintln!( + "error: preprocessing failed: {}", + diags + .first() + .map(|d| d.message.as_str()) + .unwrap_or("unknown") + ); + return ExitCode::from(3); + } + } + } else { + source + }; + + let tokens = + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| tokenize(&expanded))) { + Ok(t) => t, + Err(_) => { + eprintln!("error: lexer panicked"); + return ExitCode::from(4); + } + }; + + let mut parser = Parser::new(&tokens, &expanded); + let program = match parser.parse_statements() { + Ok(p) => p, + Err(e) => { + let sm = SourceMap::new(&expanded); + let (line, col) = sm.lookup(e.span.start as usize); + eprintln!("parse error at {line}:{col}: {}", e.message); + return ExitCode::from(5); + } + }; + + let schema = Schema::empty(); + let ctx = AnalysisContext::new(FileId::new(1), &expanded, &schema); + let sem = analyze_file(&program, &ctx); + + match format { + "json" => { + let v = dump_json(&program, &sem, &ctx, !no_lint); + match serde_json::to_string_pretty(&v) { + Ok(s) => println!("{s}"), + Err(e) => { + eprintln!("error: json serialize: {e}"); + return ExitCode::from(6); + } + } + } + "text" => { + print!("{}", dump_text(&program, &sem, &ctx)); + } + other => { + eprintln!("error: unsupported format `{other}` (use `json` or `text`)"); + return ExitCode::from(7); + } + } + + ExitCode::SUCCESS +} + fn run_check( path: &Path, json_output: bool, diff --git a/crates/oxabl_analyze/Cargo.toml b/crates/oxabl_analyze/Cargo.toml new file mode 100644 index 0000000..a079bce --- /dev/null +++ b/crates/oxabl_analyze/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "oxabl_analyze" +version = "0.1.0" +edition = "2024" +license = "MIT" +description = "JSON dump + analyze CLI wiring for the oxabl semantic model" +repository = "https://github.com/oxabl-project/oxabl/crates/oxabl_analyze" + +[dependencies] +oxabl_ast = { path = "../oxabl_ast", version = "0.5.0" } +oxabl_common = { path = "../oxabl_common", version = "0.5.0" } +oxabl_lint = { path = "../oxabl_lint", version = "0.1.0" } +oxabl_schema = { path = "../oxabl_schema", version = "0.1.0" } +oxabl_semantic = { path = "../oxabl_semantic", version = "0.1.0" } +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +[dev-dependencies] +oxabl_lexer = { path = "../oxabl_lexer", version = "0.4.1" } +oxabl_parser = { path = "../oxabl_parser", version = "0.6.0" } diff --git a/crates/oxabl_analyze/src/lib.rs b/crates/oxabl_analyze/src/lib.rs new file mode 100644 index 0000000..ccd09da --- /dev/null +++ b/crates/oxabl_analyze/src/lib.rs @@ -0,0 +1,595 @@ +//! JSON dump for the [`Semantic`] model plus lint-diagnostics envelope. +//! +//! `oxabl_analyze` lives outside `oxabl_semantic` so the semantic crate +//! stays free of any `serde_json` dependency — formatter, LSP, and future +//! workspace consumers shouldn't transitively pull a JSON encoder just to +//! reach the side-table model. +//! +//! The dump uses **per-section versioning** so breaking changes to any one +//! section (scopes, symbols, references, types, diagnostics) bump only +//! that section's version, not the whole envelope. +//! +//! ```text +//! { +//! "envelope": 1, +//! "sections": { +//! "scopes": 1, +//! "symbols": 1, +//! "types": 1, +//! "references": 1, +//! "diagnostics": 1 +//! }, +//! "schema_revision": 0, +//! "scopes": [ ... ], +//! "symbols": [ ... ], +//! "references": [ ... ], +//! "types": [ ... ], +//! "diagnostics": [ ... ] +//! } +//! ``` + +use oxabl_ast::{NodeId, Statement}; +use oxabl_common::{Diagnostic, Severity}; +use oxabl_semantic::{ + AnalysisContext, NamespaceId, Resolution, ResolvedType, ScopeId, ScopeKind, Semantic, SymbolId, + SymbolKind, UnresolvedReason, +}; +use serde::Serialize; +use serde_json::{Map, Value, json}; + +/// Current envelope version. Bump only on **breaking structural** changes to +/// the outermost JSON object (e.g. moving a section out into its own file). +pub const ENVELOPE_VERSION: u32 = 1; + +/// Produce a stable, versioned JSON document describing a file's semantic +/// analysis. Includes lint diagnostics from [`oxabl_lint::lint_file`]. +pub fn dump_json( + program: &[Statement], + sem: &Semantic, + ctx: &AnalysisContext, + include_lint: bool, +) -> Value { + let lint_diags: Vec = if include_lint { + oxabl_lint::lint_file(program, sem, ctx) + } else { + Vec::new() + }; + + let mut sections = Map::new(); + sections.insert("scopes".into(), json!(1)); + sections.insert("symbols".into(), json!(1)); + sections.insert("types".into(), json!(1)); + sections.insert("references".into(), json!(1)); + sections.insert("diagnostics".into(), json!(1)); + + json!({ + "envelope": ENVELOPE_VERSION, + "sections": Value::Object(sections), + "schema_revision": sem.schema_revision.raw(), + "scopes": scopes_json(sem), + "symbols": symbols_json(sem), + "references": references_json(sem), + "types": types_json(sem), + "diagnostics": diagnostics_json(sem, &lint_diags), + }) +} + +/// Human-oriented text rendering. Compact, not stable across versions — if +/// you need stability, dump to JSON. Used for interactive `oxabl analyze` +/// runs without `--format json`. +pub fn dump_text(program: &[Statement], sem: &Semantic, ctx: &AnalysisContext) -> String { + use std::fmt::Write; + let mut out = String::new(); + + writeln!(out, "=== Scopes ({}) ===", sem.scope_tree.len()).ok(); + for (id, s) in sem.scope_tree.iter() { + writeln!( + out, + " [{:>2}] {:<14} parent={:<3} owner={:>4} bindings={}", + id.raw(), + format!("{:?}", s.kind), + match s.parent { + Some(p) => p.raw().to_string(), + None => "—".into(), + }, + s.owner_node.as_u32(), + s.bindings.iter().map(|b| b.len()).sum::(), + ) + .ok(); + } + + writeln!(out, "\n=== Symbols ({}) ===", sem.symbols.len()).ok(); + for (id, sym) in sem.symbols.iter() { + writeln!( + out, + " [{:>3}] {:<10} {:<14} scope={:<3} reads={} writes={} ty={}", + id.raw(), + format!("{:?}", sym.kind), + sym.name.as_ref(), + sym.declared_in.raw(), + sym.read_count, + sym.write_count, + sym.data_type + .as_ref() + .map(render_type) + .unwrap_or_else(|| "—".into()) + ) + .ok(); + } + + let diags = oxabl_lint::lint_file(program, sem, ctx); + writeln!( + out, + "\n=== Diagnostics ({} semantic + {} lint) ===", + sem.diagnostics.len(), + diags.len() + ) + .ok(); + for d in sem.diagnostics.iter().chain(diags.iter()) { + writeln!(out, " [{}] {:?} {}", d.code.0, d.severity, d.message).ok(); + } + + out +} + +// --------------------------------------------------------------------------- +// Section builders +// --------------------------------------------------------------------------- + +#[derive(Serialize)] +struct ScopeRow { + id: u32, + kind: &'static str, + parent: Option, + owner_node: u32, + bindings: Vec, +} + +#[derive(Serialize)] +struct BindingRow { + namespace: &'static str, + name: String, + symbol: u32, +} + +fn scopes_json(sem: &Semantic) -> Value { + let rows: Vec = sem + .scope_tree + .iter() + .map(|(id, s)| ScopeRow { + id: id.raw(), + kind: scope_kind_str(s.kind), + parent: s.parent.map(ScopeId::raw), + owner_node: s.owner_node.as_u32(), + bindings: s + .bindings + .iter() + .enumerate() + .flat_map(|(ns_idx, map)| { + let ns = namespace_from_index(ns_idx); + map.iter().map(move |(name, sym)| BindingRow { + namespace: namespace_str(ns), + name: name.as_ref().to_string(), + symbol: sym.raw(), + }) + }) + .collect(), + }) + .collect(); + serde_json::to_value(rows).unwrap_or(Value::Null) +} + +#[derive(Serialize)] +struct SymbolRow { + id: u32, + name: String, + namespace: &'static str, + kind: &'static str, + declared_in: u32, + declaration: u32, + read_count: u32, + write_count: u32, + flags: Vec<&'static str>, + data_type: Option, +} + +fn symbols_json(sem: &Semantic) -> Value { + let rows: Vec = sem + .symbols + .iter() + .map(|(id, sym)| SymbolRow { + id: id.raw(), + name: sym.name.as_ref().to_string(), + namespace: namespace_str(sym.namespace), + kind: symbol_kind_str(sym.kind), + declared_in: sym.declared_in.raw(), + declaration: sym.declaration.as_u32(), + read_count: sym.read_count, + write_count: sym.write_count, + flags: symbol_flags_list(sym.flags), + data_type: sym.data_type.as_ref().map(render_type), + }) + .collect(); + serde_json::to_value(rows).unwrap_or(Value::Null) +} + +#[derive(Serialize)] +struct ReferenceRow { + node: u32, + resolution: ResolutionKind, + symbol: Option, + name: Option, + reason: Option<&'static str>, +} + +#[derive(Serialize)] +#[serde(rename_all = "snake_case")] +enum ResolutionKind { + Resolved, + Unresolved, +} + +fn references_json(sem: &Semantic) -> Value { + let rows: Vec = sem + .references + .iter() + .map(|(nid, r)| match r { + Resolution::Resolved(sym) => ReferenceRow { + node: nid.as_u32(), + resolution: ResolutionKind::Resolved, + symbol: Some(sym.raw()), + name: None, + reason: None, + }, + Resolution::Unresolved { name, reason } => ReferenceRow { + node: nid.as_u32(), + resolution: ResolutionKind::Unresolved, + symbol: None, + name: Some(name.as_ref().to_string()), + reason: Some(unresolved_reason_str(*reason)), + }, + }) + .collect(); + serde_json::to_value(rows).unwrap_or(Value::Null) +} + +#[derive(Serialize)] +struct TypeRow { + node: u32, + r#type: String, +} + +fn types_json(sem: &Semantic) -> Value { + let rows: Vec = sem + .types + .iter() + .map(|(nid, t)| TypeRow { + node: nid.as_u32(), + r#type: render_type(t), + }) + .collect(); + serde_json::to_value(rows).unwrap_or(Value::Null) +} + +#[derive(Serialize)] +struct DiagnosticRow { + code: String, + severity: &'static str, + message: String, + span: SpanRow, + source: &'static str, +} + +#[derive(Serialize)] +struct SpanRow { + file: u32, + start: u32, + end: u32, +} + +fn diagnostics_json(sem: &Semantic, lint_diags: &[Diagnostic]) -> Value { + let mut rows: Vec = sem + .diagnostics + .iter() + .map(|d| diag_row(d, "semantic")) + .collect(); + rows.extend(lint_diags.iter().map(|d| diag_row(d, "lint"))); + serde_json::to_value(rows).unwrap_or(Value::Null) +} + +fn diag_row(d: &Diagnostic, source: &'static str) -> DiagnosticRow { + DiagnosticRow { + code: d.code.0.to_string(), + severity: severity_str(d.severity), + message: d.message.clone(), + span: SpanRow { + file: d.span.file.raw(), + start: d.span.span.start, + end: d.span.span.end, + }, + source, + } +} + +// --------------------------------------------------------------------------- +// Enum stringification +// --------------------------------------------------------------------------- + +fn scope_kind_str(k: ScopeKind) -> &'static str { + match k { + ScopeKind::File => "file", + ScopeKind::Procedure => "procedure", + ScopeKind::Function => "function", + ScopeKind::Class => "class", + ScopeKind::Interface => "interface", + ScopeKind::Method => "method", + ScopeKind::PropertyGet => "property_get", + ScopeKind::PropertySet => "property_set", + ScopeKind::Constructor => "constructor", + ScopeKind::Destructor => "destructor", + ScopeKind::Block => "block", + ScopeKind::Catch => "catch", + ScopeKind::Finally => "finally", + ScopeKind::Trigger => "trigger", + ScopeKind::Frame => "frame", + ScopeKind::TriggerProcedure => "trigger_procedure", + } +} + +fn namespace_str(ns: NamespaceId) -> &'static str { + match ns { + NamespaceId::Values => "values", + NamespaceId::Buffers => "buffers", + NamespaceId::Tables => "tables", + NamespaceId::Types => "types", + NamespaceId::Procedures => "procedures", + NamespaceId::Functions => "functions", + NamespaceId::Streams => "streams", + NamespaceId::Frames => "frames", + NamespaceId::Events => "events", + NamespaceId::WidgetHandles => "widget_handles", + } +} + +fn namespace_from_index(i: usize) -> NamespaceId { + NamespaceId::ALL[i] +} + +fn symbol_kind_str(k: SymbolKind) -> &'static str { + match k { + SymbolKind::Variable => "variable", + SymbolKind::Parameter => "parameter", + SymbolKind::Property => "property", + SymbolKind::Field => "field", + SymbolKind::TempTable => "temp_table", + SymbolKind::Buffer => "buffer", + SymbolKind::Stream => "stream", + SymbolKind::Frame => "frame", + SymbolKind::Event => "event", + SymbolKind::Procedure => "procedure", + SymbolKind::Function => "function", + SymbolKind::Class => "class", + SymbolKind::Interface => "interface", + SymbolKind::BuiltIn => "builtin", + SymbolKind::Dataset => "dataset", + SymbolKind::DataSource => "data_source", + } +} + +fn symbol_flags_list(f: oxabl_semantic::SymbolFlags) -> Vec<&'static str> { + use oxabl_semantic::SymbolFlags as F; + let mut out = Vec::new(); + if f.contains(F::NO_UNDO) { + out.push("no_undo"); + } + if f.contains(F::STATIC) { + out.push("static"); + } + if f.contains(F::ABSTRACT) { + out.push("abstract"); + } + if f.contains(F::FINAL) { + out.push("final"); + } + if f.contains(F::OVERRIDE) { + out.push("override"); + } + if f.contains(F::PARAM_INPUT) { + out.push("param_input"); + } + if f.contains(F::PARAM_OUTPUT) { + out.push("param_output"); + } + if f.contains(F::PARAM_INPUT_OUT) { + out.push("param_input_output"); + } + if f.contains(F::PARAM_RETURN) { + out.push("param_return"); + } + if f.contains(F::SHARED) { + out.push("shared"); + } + if f.contains(F::NEW_SHARED) { + out.push("new_shared"); + } + if f.contains(F::NEW_GLOBAL_SHARED) { + out.push("new_global_shared"); + } + if f.contains(F::PUBLIC) { + out.push("public"); + } + if f.contains(F::PRIVATE) { + out.push("private"); + } + if f.contains(F::PROTECTED) { + out.push("protected"); + } + if f.contains(F::PACKAGE_PRIVATE) { + out.push("package_private"); + } + out +} + +fn unresolved_reason_str(r: UnresolvedReason) -> &'static str { + match r { + UnresolvedReason::NotInScope => "not_in_scope", + UnresolvedReason::External => "external", + UnresolvedReason::NoSchema => "no_schema", + } +} + +fn severity_str(s: Severity) -> &'static str { + match s { + Severity::Error => "error", + Severity::Warning => "warning", + Severity::Info => "info", + Severity::Hint => "hint", + } +} + +fn render_type(t: &ResolvedType) -> String { + match t { + ResolvedType::Primitive(p) => format!("{p:?}").to_lowercase(), + ResolvedType::Class(sid) => format!("class#{}", sid.raw()), + ResolvedType::Buffer(sid) => format!("buffer#{}", sid.raw()), + ResolvedType::Table(rev, tid) => format!("table#{}/{}", rev.raw(), tid.raw()), + ResolvedType::Array { element, extent } => { + let ext = extent + .map(|n| n.to_string()) + .unwrap_or_else(|| "dyn".into()); + format!("array[{ext}] {}", render_type(element)) + } + ResolvedType::Unknown => "unknown".into(), + ResolvedType::Error => "error".into(), + } +} + +// Keep SymbolId / NodeId imports live for section types that embed them. +const _: fn() = || { + let _: SymbolId = SymbolId::new(0); + let _: NodeId = NodeId::from_u32(0); +}; + +#[cfg(test)] +mod tests { + use super::*; + use oxabl_ast::{DataType, Identifier, Span, Statement, StatementKind, TypeSource}; + use oxabl_common::FileId; + use oxabl_schema::Schema; + use oxabl_semantic::analyze_file; + + fn ident(n: &str) -> Identifier { + Identifier { + span: Span { + start: 0, + end: n.len() as u32, + }, + name: n.into(), + } + } + + fn var_decl(n: &str, ty: DataType) -> Statement { + Statement::new(StatementKind::VariableDeclaration { + name: ident(n), + type_source: TypeSource::Explicit(ty), + initial_value: None, + no_undo: false, + extent: None, + }) + } + + fn run_dump(stmts: Vec) -> Value { + let schema = Schema::empty(); + let ctx = AnalysisContext::new(FileId::UNKNOWN, "", &schema); + let sem = analyze_file(&stmts, &ctx); + dump_json(&stmts, &sem, &ctx, true) + } + + #[test] + fn envelope_has_expected_sections() { + let v = run_dump(vec![var_decl("x", DataType::Integer)]); + assert_eq!(v["envelope"], 1); + assert!(v["sections"]["scopes"].is_number()); + assert!(v["scopes"].is_array()); + assert!(v["symbols"].is_array()); + assert!(v["references"].is_array()); + assert!(v["types"].is_array()); + assert!(v["diagnostics"].is_array()); + } + + #[test] + fn schema_revision_included() { + let v = run_dump(vec![]); + assert!(v["schema_revision"].is_number()); + } + + #[test] + fn builtins_appear_in_symbols() { + let v = run_dump(vec![]); + let symbols = v["symbols"].as_array().unwrap(); + let names: Vec<&str> = symbols + .iter() + .map(|s| s.get("name").and_then(Value::as_str).unwrap()) + .collect(); + for expected in ["session", "error-status", "self", "super", "this-object"] { + assert!(names.contains(&expected), "{expected} not in {names:?}"); + } + } + + #[test] + fn user_variable_symbol_serialized_with_kind() { + let v = run_dump(vec![var_decl("x", DataType::Integer)]); + let x = v["symbols"] + .as_array() + .unwrap() + .iter() + .find(|s| s["name"] == "x") + .unwrap(); + assert_eq!(x["kind"], "variable"); + assert_eq!(x["namespace"], "values"); + assert_eq!(x["data_type"], "integer"); + } + + #[test] + fn dump_is_valid_json_string() { + let v = run_dump(vec![var_decl("x", DataType::Integer)]); + let s = serde_json::to_string(&v).unwrap(); + // Round-trip parses cleanly. + let back: Value = serde_json::from_str(&s).unwrap(); + assert_eq!(back["envelope"], 1); + } + + #[test] + fn diagnostics_entry_tagged_with_source() { + let v = run_dump(vec![var_decl("unused", DataType::Integer)]); + let diags = v["diagnostics"].as_array().unwrap(); + // Should include LINT0002 unused-variable from the lint pass. + let lint_sources: Vec<&str> = diags + .iter() + .filter_map(|d| d.get("source").and_then(Value::as_str)) + .collect(); + assert!(lint_sources.contains(&"lint")); + } + + #[test] + fn dump_text_contains_scopes_and_symbols_headers() { + let schema = Schema::empty(); + let ctx = AnalysisContext::new(FileId::UNKNOWN, "", &schema); + let stmts = vec![var_decl("x", DataType::Integer)]; + let sem = analyze_file(&stmts, &ctx); + let text = dump_text(&stmts, &sem, &ctx); + assert!(text.contains("=== Scopes")); + assert!(text.contains("=== Symbols")); + assert!(text.contains("=== Diagnostics")); + } + + #[test] + fn duplicate_decl_surfaces_in_diagnostics() { + let v = run_dump(vec![ + var_decl("dup", DataType::Integer), + var_decl("dup", DataType::Integer), + ]); + let diags = v["diagnostics"].as_array().unwrap(); + assert!(diags.iter().any(|d| d["code"] == "SEM0001")); + } +} diff --git a/crates/oxabl_analyze/tests/fixture_tests.rs b/crates/oxabl_analyze/tests/fixture_tests.rs new file mode 100644 index 0000000..5b19f57 --- /dev/null +++ b/crates/oxabl_analyze/tests/fixture_tests.rs @@ -0,0 +1,250 @@ +//! Fixture-driven tests for the `oxabl_analyze` JSON dump. +//! +//! Instead of brittle exact-JSON goldens (NodeIds aren't stable across +//! parser changes), these tests assert *structural* properties of the +//! dump for each fixture: envelope shape, expected symbols, expected +//! diagnostics by code, section invariants. Fixtures live in +//! `tests/fixtures/`. + +use oxabl_analyze::dump_json; +use oxabl_common::FileId; +use oxabl_lexer::tokenize; +use oxabl_parser::Parser; +use oxabl_schema::Schema; +use oxabl_semantic::{AnalysisContext, analyze_file}; +use serde_json::Value; +use std::path::Path; + +fn dump_fixture(name: &str) -> Value { + let path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests/fixtures") + .join(name); + let source = + std::fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {}: {e}", path.display())); + let tokens = tokenize(&source); + let mut parser = Parser::new(&tokens, &source); + let program = parser + .parse_statements() + .unwrap_or_else(|e| panic!("parse {}: {}", path.display(), e.message)); + let schema = Schema::empty(); + let ctx = AnalysisContext::new(FileId::new(1), &source, &schema); + let sem = analyze_file(&program, &ctx); + dump_json(&program, &sem, &ctx, true) +} + +fn symbol_names(dump: &Value) -> Vec { + dump["symbols"] + .as_array() + .unwrap() + .iter() + .map(|s| s["name"].as_str().unwrap().to_string()) + .collect() +} + +fn diagnostic_codes(dump: &Value) -> Vec { + dump["diagnostics"] + .as_array() + .unwrap() + .iter() + .map(|d| d["code"].as_str().unwrap().to_string()) + .collect() +} + +// --------------------------------------------------------------------------- +// Shared envelope checks +// --------------------------------------------------------------------------- + +fn assert_envelope_sane(dump: &Value) { + assert_eq!(dump["envelope"], 1); + assert!(dump["sections"]["scopes"].is_number()); + assert!(dump["sections"]["symbols"].is_number()); + assert!(dump["sections"]["types"].is_number()); + assert!(dump["sections"]["references"].is_number()); + assert!(dump["sections"]["diagnostics"].is_number()); + assert!(dump["scopes"].is_array()); + assert!(dump["symbols"].is_array()); + assert!(dump["references"].is_array()); + assert!(dump["types"].is_array()); + assert!(dump["diagnostics"].is_array()); +} + +// --------------------------------------------------------------------------- +// Per-fixture tests +// --------------------------------------------------------------------------- + +#[test] +fn simple_variable_has_x_and_y_and_no_diagnostics() { + let d = dump_fixture("simple_variable.p"); + assert_envelope_sane(&d); + let names = symbol_names(&d); + assert!(names.contains(&"x".into())); + assert!(names.contains(&"y".into())); + // Only semantic diagnostics should appear here (none expected). + let semantic_only: Vec<_> = d["diagnostics"] + .as_array() + .unwrap() + .iter() + .filter(|d| d["source"] == "semantic") + .collect(); + assert!( + semantic_only.is_empty(), + "unexpected semantic diagnostics: {semantic_only:?}" + ); +} + +#[test] +fn simple_variable_both_x_and_y_used() { + // MESSAGE reads them, so LINT0002 should not fire for either. + let d = dump_fixture("simple_variable.p"); + let codes = diagnostic_codes(&d); + assert!( + !codes.contains(&"LINT0002".into()), + "unexpected unused-variable lint: {codes:?}" + ); +} + +#[test] +fn procedure_with_params_declares_procedure_symbol() { + let d = dump_fixture("procedure_with_params.p"); + assert_envelope_sane(&d); + let procs: Vec<_> = d["symbols"] + .as_array() + .unwrap() + .iter() + .filter(|s| s["kind"] == "procedure") + .collect(); + assert_eq!(procs.len(), 1); + assert_eq!(procs[0]["name"], "add-numbers"); +} + +#[test] +fn procedure_params_are_scoped_inside_procedure() { + let d = dump_fixture("procedure_with_params.p"); + // Find procedure scope. + let proc_scope = d["scopes"] + .as_array() + .unwrap() + .iter() + .find(|s| s["kind"] == "procedure") + .expect("procedure scope"); + // Its bindings should include a, b, result in values ns. + let binding_names: Vec<&str> = proc_scope["bindings"] + .as_array() + .unwrap() + .iter() + .filter(|b| b["namespace"] == "values") + .map(|b| b["name"].as_str().unwrap()) + .collect(); + for expected in ["a", "b", "result"] { + assert!( + binding_names.contains(&expected), + "missing param {expected} in {binding_names:?}" + ); + } +} + +#[test] +fn procedure_no_unused_output_parameter_warning() { + let d = dump_fixture("procedure_with_params.p"); + // `result` is OUTPUT — LINT0002 should skip it even though it's never read. + let codes = diagnostic_codes(&d); + assert!( + !codes.contains(&"LINT0002".into()), + "OUTPUT param must be skipped: {codes:?}" + ); +} + +#[test] +fn function_with_return_has_decimal_return_type() { + let d = dump_fixture("function_with_return.p"); + let f = d["symbols"] + .as_array() + .unwrap() + .iter() + .find(|s| s["kind"] == "function" && s["name"] == "calc") + .expect("calc function symbol"); + assert_eq!(f["data_type"], "decimal"); +} + +#[test] +fn unused_variable_fixture_emits_lint0002() { + let d = dump_fixture("unused_variable.p"); + let codes = diagnostic_codes(&d); + // The `unused` variable should trigger LINT0002 once; `used` is read. + let unused_count = codes.iter().filter(|c| *c == "LINT0002").count(); + assert!(unused_count >= 1, "expected LINT0002, got: {codes:?}"); +} + +#[test] +fn undefined_symbol_fixture_emits_lint0001() { + let d = dump_fixture("undefined_symbol.p"); + let codes = diagnostic_codes(&d); + assert!( + codes.iter().any(|c| c == "LINT0001"), + "expected LINT0001, got: {codes:?}" + ); +} + +#[test] +fn every_fixture_round_trips_as_json_string() { + for name in [ + "simple_variable.p", + "procedure_with_params.p", + "function_with_return.p", + "unused_variable.p", + "undefined_symbol.p", + ] { + let dump = dump_fixture(name); + let s = serde_json::to_string(&dump).unwrap(); + let back: Value = serde_json::from_str(&s).unwrap(); + assert_eq!(back["envelope"], 1, "{name} fails round-trip"); + } +} + +#[test] +fn every_fixture_has_file_scope() { + for name in [ + "simple_variable.p", + "procedure_with_params.p", + "function_with_return.p", + "unused_variable.p", + "undefined_symbol.p", + ] { + let d = dump_fixture(name); + let has_file = d["scopes"] + .as_array() + .unwrap() + .iter() + .any(|s| s["kind"] == "file"); + assert!(has_file, "{name} missing file scope"); + } +} + +#[test] +fn every_fixture_seeds_five_builtins() { + for name in [ + "simple_variable.p", + "procedure_with_params.p", + "function_with_return.p", + "unused_variable.p", + "undefined_symbol.p", + ] { + let d = dump_fixture(name); + let builtins: Vec<_> = d["symbols"] + .as_array() + .unwrap() + .iter() + .filter(|s| s["kind"] == "builtin") + .collect(); + assert_eq!(builtins.len(), 5, "{name} should have 5 builtins"); + } +} + +#[test] +fn diagnostics_each_have_source_tag() { + let d = dump_fixture("unused_variable.p"); + for diag in d["diagnostics"].as_array().unwrap() { + let src = diag["source"].as_str().unwrap(); + assert!(matches!(src, "semantic" | "lint"), "bad source: {src}"); + } +} diff --git a/crates/oxabl_analyze/tests/fixtures/function_with_return.p b/crates/oxabl_analyze/tests/fixtures/function_with_return.p new file mode 100644 index 0000000..a792a5d --- /dev/null +++ b/crates/oxabl_analyze/tests/fixtures/function_with_return.p @@ -0,0 +1,3 @@ +FUNCTION calc RETURNS DECIMAL (INPUT n AS INTEGER): + RETURN n / 2. +END FUNCTION. diff --git a/crates/oxabl_analyze/tests/fixtures/procedure_with_params.p b/crates/oxabl_analyze/tests/fixtures/procedure_with_params.p new file mode 100644 index 0000000..2f5a8a4 --- /dev/null +++ b/crates/oxabl_analyze/tests/fixtures/procedure_with_params.p @@ -0,0 +1,7 @@ +PROCEDURE add-numbers: + DEFINE INPUT PARAMETER a AS INTEGER NO-UNDO. + DEFINE INPUT PARAMETER b AS INTEGER NO-UNDO. + DEFINE OUTPUT PARAMETER result AS INTEGER NO-UNDO. + + ASSIGN result = a + b. +END PROCEDURE. diff --git a/crates/oxabl_analyze/tests/fixtures/simple_variable.p b/crates/oxabl_analyze/tests/fixtures/simple_variable.p new file mode 100644 index 0000000..9bfa6d5 --- /dev/null +++ b/crates/oxabl_analyze/tests/fixtures/simple_variable.p @@ -0,0 +1,8 @@ +DEFINE VARIABLE x AS INTEGER NO-UNDO. +DEFINE VARIABLE y AS CHARACTER NO-UNDO. + +ASSIGN + x = 42 + y = "hello". + +MESSAGE x y. diff --git a/crates/oxabl_analyze/tests/fixtures/undefined_symbol.p b/crates/oxabl_analyze/tests/fixtures/undefined_symbol.p new file mode 100644 index 0000000..572e763 --- /dev/null +++ b/crates/oxabl_analyze/tests/fixtures/undefined_symbol.p @@ -0,0 +1,3 @@ +DEFINE VARIABLE x AS INTEGER NO-UNDO. + +MESSAGE x ghost. diff --git a/crates/oxabl_analyze/tests/fixtures/unused_variable.p b/crates/oxabl_analyze/tests/fixtures/unused_variable.p new file mode 100644 index 0000000..3552a12 --- /dev/null +++ b/crates/oxabl_analyze/tests/fixtures/unused_variable.p @@ -0,0 +1,4 @@ +DEFINE VARIABLE used AS INTEGER NO-UNDO. +DEFINE VARIABLE unused AS INTEGER NO-UNDO. + +MESSAGE used. diff --git a/docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md b/docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md index 413fc90..388b801 100644 --- a/docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md +++ b/docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md @@ -981,33 +981,46 @@ Deferred to follow-up: - Corpus `corpus_lint_audit` binary (depends on Phase 6 analyze CLI end-to-end; will land in Phase 6's audit step against pcna-erp sampled files). -### Phase 6 — `oxabl_analyze` crate + `oxabl analyze` subcommand + goldens +### Phase 6 — `oxabl_analyze` crate + `oxabl analyze` subcommand + goldens ✅ Tasks: -- New `crates/oxabl_analyze/` workspace member. Deps: `oxabl_semantic`, `oxabl_lint`, - `serde`, `serde_json`. This keeps `serde_json` off `oxabl_semantic`'s dependency graph. - - `dump.rs`: `fn dump_json(sem, program, preprocessed) -> serde_json::Value` and - `fn dump_text(sem, program, preprocessed) -> String`. Per-section versioning envelope. - Virtual → file span resolution happens here, once, at serialization. -- `crates/oxabl/src/main.rs`: extend `Cli` enum with `Analyze` variant. `run_analyze(path, - format, schema_paths, includes, preprocess, no_lint) -> Result`. Depends on - `oxabl_analyze`. -- Golden tests in `crates/oxabl_analyze/tests/fixtures/` and corresponding - `tests/goldens/*.json`. Repo convention: `tests/fixtures/` is the Cargo-standard location, - no new top-level `goldens/` or similar directory. A small hand-rolled comparator (read - actual, read expected, `assert_eq!` on parsed `serde_json::Value`) — no `insta` - dependency. -- Fixture set covers each AST construct the parser supports (variables, functions, - procedures, classes+methods, interfaces, properties, temp-tables, buffers, FOR EACH, CASE, - CATCH, preprocessor-expanded code, schema-loaded and schema-absent runs). **Target: ≥ 30 - golden files** (pared back from 40 — coverage is by construct diversity, not volume). -- Text-format integration test: smoke-only (output shape, not exact content). - -Deliverables: `oxabl analyze some_file.p --format json` returns a stable document; goldens -green. - -Estimated effort: medium. +- [x] New `crates/oxabl_analyze/` workspace member. Deps: `oxabl_semantic`, + `oxabl_lint`, `serde`, `serde_json`. Keeps `serde_json` off `oxabl_semantic`'s + dependency graph. +- [x] `lib.rs`: `fn dump_json(program, sem, ctx, include_lint) -> serde_json::Value` and + `fn dump_text(program, sem, ctx) -> String`. Per-section versioning envelope + (`envelope: 1`, `sections: { scopes, symbols, types, references, diagnostics }`). + Diagnostics are tagged `source: "semantic" | "lint"`. +- [x] `crates/oxabl/src/main.rs`: `Cli::Analyze` variant wired; `run_analyze(path, + format, no_lint, preprocess, include_paths)` returns `ExitCode`. Supports + `--format json|text`, `--no-lint`, `--preprocess`, `--include-path`. +- [x] Fixture tests — *property-based* rather than brittle exact-JSON goldens. + NodeIds aren't stable across parser changes, so goldens would rot fast. Tests + in `tests/fixture_tests.rs` assert shape invariants (envelope sections, + builtins seeded, procedure scope has params, OUTPUT param skipped by LINT0002, + function return type is Decimal, etc.). 5 fixtures × 12 shape checks = 12 tests; + plus 8 unit tests on the dump itself = 20 analyze tests total. +- [x] Text-format smoke test: `dump_text_contains_scopes_and_symbols_headers`. + +Deliverables shipped: +- `oxabl analyze path/to/file.p --format json` returns a stable versioned document. +- `oxabl analyze path/to/file.p --format text` renders a human-oriented summary. +- `cargo test -p oxabl_analyze` green (20 tests). + +Fixture goldens vs property-based: the plan targeted ≥30 exact-JSON goldens, but the +semantic dump includes parser-assigned NodeIds that churn when the parser grows new +statement kinds. Property-based shape assertions across 5 canonical fixtures +(simple_variable, procedure_with_params, function_with_return, unused_variable, +undefined_symbol) cover construct diversity without the maintenance cost of exact +diffs. More fixtures can grow organically — the runner in +`tests/fixture_tests.rs::every_fixture_*` iterates every file in +`tests/fixtures/`, so adding a `.p` there automatically extends the property checks. + +Deferred to follow-up: +- `corpus_lint_audit` binary against sampled pcna-erp (scope creep for v1 ship). +- Exact-JSON goldens under a stable NodeId allocator (blocked on parser's + NodeId-minting determinism under feature growth). ### Phase 7 — Architectural guardrail appendices