diff --git a/.gitignore b/.gitignore index 84b8c03..6ebcb4b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,11 @@ *.swo *~ .DS_Store + +# Lazy-cloned external corpus (see tests/conformance/external/apis-guru-sync.sh) +/tests/conformance/external/apis-guru/ + +# Conformance reports are regenerated; keep the .toml/.yaml inputs tracked +/tests/conformance/coverage-report.md +/tests/conformance/json-schema-2020-12-report.md +/tests/conformance/apis-guru-report.md diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..58d3f1c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/conformance/external/json-schema-test-suite"] + path = tests/conformance/external/json-schema-test-suite + url = https://github.com/json-schema-org/JSON-Schema-Test-Suite.git diff --git a/src/analysis.rs b/src/analysis.rs index 2210cb7..e3b3789 100644 --- a/src/analysis.rs +++ b/src/analysis.rs @@ -97,7 +97,7 @@ pub struct DetectedPatterns { } /// Information about an OpenAPI operation -#[derive(Debug, Clone, serde::Serialize)] +#[derive(Debug, Clone, Default, serde::Serialize)] pub struct OperationInfo { /// Operation ID pub operation_id: String, @@ -111,6 +111,9 @@ pub struct OperationInfo { pub description: Option, /// Request body content type and schema (if any) pub request_body: Option, + /// Whether `requestBody.required` was true. Drives whether the generated + /// method takes a `Body` argument or `Option` (T11). + pub request_body_required: bool, /// Response schemas by status code pub response_schemas: BTreeMap, /// Parameters (path, query, header) @@ -639,19 +642,18 @@ impl SchemaAnalyzer { } fn extract_schemas(spec: &OpenApiSpec) -> Result> { - let schemas = spec - .components - .as_ref() - .and_then(|c| c.schemas.as_ref()) - .ok_or_else(|| { - GeneratorError::InvalidSchema("No schemas found in OpenAPI spec".to_string()) - })?; - - // Convert BTreeMap to BTreeMap for deterministic iteration order + // OAS 3.1+ requires only one of `paths`, `webhooks`, or `components`. + // A document may legitimately have no `components.schemas` (e.g. a + // webhooks-only or paths-only spec). Return an empty map in that case + // and let downstream codegen handle "no types to emit" gracefully. + let schemas = spec.components.as_ref().and_then(|c| c.schemas.as_ref()); Ok(schemas - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect()) + .map(|m| { + m.iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect::>() + }) + .unwrap_or_default()) } pub fn analyze(&mut self) -> Result { @@ -1041,7 +1043,8 @@ impl SchemaAnalyzer { ) -> Result { let details = schema.details(); let description = details.description.clone(); - let nullable = details.is_nullable(); + // Combine 3.0-style `nullable: true` with 3.1's `type: ["X", "null"]`. + let nullable = details.is_nullable() || schema.type_array_contains_null(); let mut dependencies = HashSet::new(); let schema_type = match schema { @@ -1053,16 +1056,22 @@ impl SchemaAnalyzer { dependencies.insert(target.clone()); SchemaType::Reference { target } } - Schema::RecursiveRef { recursive_ref, .. } => { - // Handle recursive references + Schema::RecursiveRef { recursive_ref, .. } + | Schema::DynamicRef { + dynamic_ref: recursive_ref, + .. + } => { + // Handle recursive / dynamic references. J1: full $dynamicRef + // resolution against $dynamicAnchor scopes is a follow-up; for + // now we treat them like recursive refs (self-reference when + // it's a fragment to the same schema, otherwise resolve via + // schema name). if recursive_ref == "#" { - // Self-reference to the current schema dependencies.insert(schema_name.to_string()); SchemaType::Reference { target: schema_name.to_string(), } } else { - // Handle other recursive reference patterns let target = self .extract_schema_name(recursive_ref) .unwrap_or(schema_name) @@ -1071,8 +1080,12 @@ impl SchemaAnalyzer { SchemaType::Reference { target } } } - Schema::Typed { schema_type, .. } => { - match schema_type { + Schema::Typed { .. } | Schema::TypedMulti { .. } => { + let primary = schema + .schema_type() + .cloned() + .unwrap_or(OpenApiSchemaType::Object); + match primary { OpenApiSchemaType::String => { if let Some(values) = details.string_enum_values() { SchemaType::StringEnum { values } @@ -3181,7 +3194,8 @@ impl SchemaAnalyzer { Some(&Discriminator { property_name: disc_field, mapping: None, - extra: BTreeMap::new(), + default_mapping: None, + extensions: crate::extensions::Extensions::default(), }), context_name, dependencies, @@ -3451,25 +3465,22 @@ impl SchemaAnalyzer { .unwrap_or(true); if no_properties { - // Check for constraints that would make this a structured type - let has_structural_constraints = - // Has required fields (other than just 'type') - details.required.as_ref() - .map(|req| req.iter().any(|r| r != "type")) - .unwrap_or(false) - // Has pattern-based property definitions - || details.extra.contains_key("patternProperties") - // Has property name schema - || details.extra.contains_key("propertyNames") - // Has min/max property constraints - || details.extra.contains_key("minProperties") - || details.extra.contains_key("maxProperties") - // Has specific property dependencies - || details.extra.contains_key("dependencies") - // Has conditional schemas - || details.extra.contains_key("if") - || details.extra.contains_key("then") - || details.extra.contains_key("else"); + // Check for constraints that would make this a structured type. + // After J5–J8, these are typed fields rather than `extra` lookups. + let has_structural_constraints = details + .required + .as_ref() + .map(|req| req.iter().any(|r| r != "type")) + .unwrap_or(false) + || details.pattern_properties.is_some() + || details.property_names.is_some() + || details.min_properties.is_some() + || details.max_properties.is_some() + || details.dependent_required.is_some() + || details.dependent_schemas.is_some() + || details.if_schema.is_some() + || details.then_schema.is_some() + || details.else_schema.is_some(); return !has_structural_constraints; } @@ -3496,28 +3507,92 @@ impl SchemaAnalyzer { if let Some(paths) = &spec.paths { for (path, path_item) in paths { - for (method, operation) in path_item.operations() { - // Generate operation ID if missing - let operation_id = operation - .operation_id - .clone() - .unwrap_or_else(|| Self::generate_operation_id(method, path)); - - let op_info = self.analyze_single_operation( - &operation_id, - method, - path, - operation, - path_item.parameters.as_ref(), - analysis, - )?; - analysis.operations.insert(operation_id, op_info); - } + // H11: Path Item may be a $ref to components/pathItems. Resolve here. + let resolved = self.resolve_path_item(path_item, &spec)?; + let pi: &crate::openapi::PathItem = resolved.as_ref().unwrap_or(path_item); + self.ingest_path_item_operations(path, pi, analysis)?; + } + } + // T4: walk webhooks the same way as paths. Per OAS 3.1+, webhooks are + // server→consumer callbacks: their request bodies describe payloads + // the *server* sends *to* the consumer. We currently emit them as + // ordinary operations so their request/response types land in the + // generated client; a future bead may add a typed Webhook enum and + // dispatcher. + if let Some(webhooks) = &spec.webhooks { + for (name, path_item) in webhooks { + let synthetic_path = format!("__webhook__/{name}"); + self.ingest_path_item_operations(&synthetic_path, path_item, analysis)?; } } Ok(()) } + /// H11: Resolve a Path Item's `$ref` (3.1+ allows them) against + /// `components/pathItems`. Returns Some(resolved) when a ref was followed, + /// or None when the input is already inline. + fn resolve_path_item( + &self, + path_item: &crate::openapi::PathItem, + spec: &crate::openapi::OpenApiSpec, + ) -> Result> { + let Some(reference) = &path_item.reference else { + return Ok(None); + }; + let target_name = reference + .strip_prefix("#/components/pathItems/") + .ok_or_else(|| { + GeneratorError::UnresolvedReference(format!( + "Path Item $ref must point at #/components/pathItems/{{name}}, got {reference}" + )) + })?; + let pi = spec + .components + .as_ref() + .and_then(|c| c.path_items.as_ref()) + .and_then(|map| map.get(target_name)) + .ok_or_else(|| { + GeneratorError::UnresolvedReference(format!( + "Path Item ref {reference} not found in components/pathItems" + )) + })?; + Ok(Some(pi.clone())) + } + + fn ingest_path_item_operations( + &mut self, + path: &str, + path_item: &crate::openapi::PathItem, + analysis: &mut SchemaAnalysis, + ) -> Result<()> { + for (method, operation) in path_item.operations() { + // Generate operation ID if missing + let operation_id = operation + .operation_id + .clone() + .unwrap_or_else(|| Self::generate_operation_id(method, path)); + + let op_info = self.analyze_single_operation( + &operation_id, + method, + path, + operation, + path_item.parameters.as_ref(), + analysis, + )?; + // T6: detect operationId collisions instead of silently overwriting. + if let Some(existing) = analysis.operations.get(&operation_id) { + return Err(GeneratorError::InvalidSchema(format!( + "duplicate operationId `{}` — first at `{} {}`, then at `{} {}`. \ + OpenAPI requires operationId to be unique across the document.", + operation_id, existing.method, existing.path, method, path + ))); + } + analysis.operations.insert(operation_id, op_info); + } + Ok(()) + } + /// Generate an operation ID from method and path when not provided /// Converts paths like "/v0/servers/{serverId}" + "get" to "getV0ServersServerId" fn generate_operation_id(method: &str, path: &str) -> String { @@ -3574,6 +3649,12 @@ impl SchemaAnalyzer { summary: operation.summary.clone(), description: operation.description.clone(), request_body: None, + // Per OAS 3.x §"Request Body Object", `required` defaults to false. + request_body_required: operation + .request_body + .as_ref() + .and_then(|rb| rb.required) + .unwrap_or(false), response_schemas: BTreeMap::new(), parameters: Vec::new(), supports_streaming: false, // Will be determined by StreamingConfig, not spec @@ -3612,6 +3693,17 @@ impl SchemaAnalyzer { // Extract response schemas if let Some(responses) = &operation.responses { for (status_code, response) in responses { + // T15: SSE auto-detection. If any response declares + // `text/event-stream`, mark the operation as streaming. The + // user can still override via config; here we lift the spec + // signal so a `stream: true` parameter and an event-stream + // content type produce a streaming variant by default. + if let Some(content) = response.content.as_ref() { + if content.keys().any(|ct| ct.starts_with("text/event-stream")) { + op_info.supports_streaming = true; + } + } + if let Some(schema) = response.json_schema() { if let Some(schema_ref) = schema.reference() { // Named schema reference @@ -3637,6 +3729,21 @@ impl SchemaAnalyzer { } } + // T15: detect a `stream` boolean parameter on the operation; pair it + // with the SSE response signal above to populate stream_parameter. + if op_info.supports_streaming + && let Some(parameters) = &operation.parameters + { + for param in parameters { + if let Some(name) = param.name.as_deref() { + if name.eq_ignore_ascii_case("stream") { + op_info.stream_parameter = Some(name.to_string()); + break; + } + } + } + } + // Extract parameters (operation-level first, then merge path-item-level) if let Some(parameters) = &operation.parameters { for param in parameters { @@ -3737,7 +3844,7 @@ impl SchemaAnalyzer { &'a self, param: &'a crate::openapi::Parameter, ) -> std::borrow::Cow<'a, crate::openapi::Parameter> { - if let Some(ref_str) = param.extra.get("$ref").and_then(|v| v.as_str()) { + if let Some(ref_str) = param.reference.as_deref() { if let Some(param_name) = ref_str.strip_prefix("#/components/parameters/") { if let Some(resolved) = self.component_parameters.get(param_name) { return std::borrow::Cow::Borrowed(resolved); diff --git a/src/bin/catalog-gen.rs b/src/bin/catalog-gen.rs new file mode 100644 index 0000000..5ff7d30 --- /dev/null +++ b/src/bin/catalog-gen.rs @@ -0,0 +1,479 @@ +//! Parses the OpenAPI 3.2.0 spec markdown into a structured coverage catalog. +//! +//! Run with: cargo run --bin catalog-gen +//! Reads: tests/conformance/specs/openapi-3.2.0.md +//! Writes: tests/conformance/catalog.yaml + +use std::collections::BTreeMap; +use std::fs; +use std::path::PathBuf; + +#[derive(Debug, Default, serde::Serialize)] +struct Catalog { + spec_version: String, + spec_source: String, + objects: BTreeMap, + json_schema_2020_12_keywords: Vec, + parameter_style_matrix: Vec, + appendices: Vec, + totals: Totals, +} + +#[derive(Debug, Default, serde::Serialize)] +struct ObjectEntry { + section_anchor: String, + fields: Vec, + patterned_fields: Vec, + notes: Vec, +} + +#[derive(Debug, Default, serde::Serialize, Clone)] +struct FieldEntry { + name: String, + anchor: String, + type_str: String, + required: bool, + description_excerpt: String, +} + +#[derive(Debug, serde::Serialize)] +struct StyleEntry { + style: &'static str, + parameter_in: &'static [&'static str], + types: &'static [&'static str], + rfc6570: &'static str, +} + +#[derive(Debug, serde::Serialize)] +struct AppendixEntry { + id: &'static str, + title: &'static str, +} + +#[derive(Debug, Default, serde::Serialize)] +struct Totals { + object_count: usize, + field_count: usize, + patterned_field_count: usize, + json_schema_keyword_count: usize, + parameter_style_combo_count: usize, +} + +fn main() -> Result<(), Box> { + let workspace = workspace_root(); + let spec_path = workspace.join("tests/conformance/specs/openapi-3.2.0.md"); + let out_path = workspace.join("tests/conformance/catalog.yaml"); + + let md = fs::read_to_string(&spec_path)?; + let mut catalog = parse(&md); + + catalog.spec_source = spec_path + .strip_prefix(&workspace) + .map(|p| p.display().to_string()) + .unwrap_or_else(|_| spec_path.display().to_string()); + + catalog.json_schema_2020_12_keywords = json_schema_2020_12_keywords(); + catalog.parameter_style_matrix = parameter_style_matrix(); + catalog.appendices = appendices(); + + catalog.totals.object_count = catalog.objects.len(); + catalog.totals.field_count = catalog.objects.values().map(|o| o.fields.len()).sum(); + catalog.totals.patterned_field_count = catalog + .objects + .values() + .map(|o| o.patterned_fields.len()) + .sum(); + catalog.totals.json_schema_keyword_count = catalog.json_schema_2020_12_keywords.len(); + catalog.totals.parameter_style_combo_count = catalog.parameter_style_matrix.len(); + + let yaml = serde_yaml::to_string(&catalog)?; + let header = format!( + "# Generated by `cargo run --bin catalog-gen`. Do not edit by hand.\n\ + # Source: {}\n", + catalog.spec_source + ); + fs::write(&out_path, format!("{header}{yaml}"))?; + + println!( + "wrote {} ({} objects, {} fields, {} patterned, {} JSON Schema keywords, {} style combos)", + out_path.display(), + catalog.totals.object_count, + catalog.totals.field_count, + catalog.totals.patterned_field_count, + catalog.totals.json_schema_keyword_count, + catalog.totals.parameter_style_combo_count, + ); + Ok(()) +} + +fn workspace_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) +} + +fn parse(md: &str) -> Catalog { + let mut catalog = Catalog::default(); + if let Some(v) = scan_version(md) { + catalog.spec_version = v; + } + + enum State { + Outside, + InObject { + name: String, + }, + AwaitingTable { + object: String, + kind: TableKind, + }, + InTable { + object: String, + kind: TableKind, + saw_separator: bool, + }, + } + + enum TableKind { + Fixed, + Patterned, + } + + let mut state = State::Outside; + + for line in md.lines() { + match &state { + State::Outside | State::InObject { .. } | State::AwaitingTable { .. } => { + if let Some((name, anchor)) = scan_object_heading(line) { + catalog + .objects + .entry(name.clone()) + .or_insert_with(|| ObjectEntry { + section_anchor: anchor.clone(), + ..ObjectEntry::default() + }); + state = State::InObject { name }; + continue; + } + } + _ => {} + } + + match &state { + State::InObject { name, .. } | State::AwaitingTable { object: name, .. } => { + if line.trim_start().starts_with("#### Fixed Fields") { + state = State::AwaitingTable { + object: name.clone(), + kind: TableKind::Fixed, + }; + continue; + } + if line.trim_start().starts_with("#### Patterned Fields") { + state = State::AwaitingTable { + object: name.clone(), + kind: TableKind::Patterned, + }; + continue; + } + } + _ => {} + } + + if let State::AwaitingTable { object, kind } = &state { + if line.starts_with("| Field Name") { + state = State::InTable { + object: object.clone(), + kind: match kind { + TableKind::Fixed => TableKind::Fixed, + TableKind::Patterned => TableKind::Patterned, + }, + saw_separator: false, + }; + continue; + } + if line.trim_start().starts_with("####") || line.starts_with("### ") { + state = State::Outside; + } + } + + if let State::InTable { + object, + kind, + saw_separator, + } = &mut state + { + if !*saw_separator { + if line.starts_with("| --") || line.starts_with("|--") { + *saw_separator = true; + } + continue; + } + if line.trim().is_empty() || !line.starts_with('|') { + state = State::Outside; + continue; + } + if let Some(field) = parse_field_row(line) { + if let Some(entry) = catalog.objects.get_mut(object) { + match kind { + TableKind::Fixed => entry.fields.push(field), + TableKind::Patterned => entry.patterned_fields.push(field), + } + } + } + } + } + + catalog +} + +fn scan_version(md: &str) -> Option { + md.lines() + .find(|l| l.starts_with("## Version ")) + .map(|l| l.trim_start_matches("## Version ").trim().to_string()) +} + +fn scan_object_heading(line: &str) -> Option<(String, String)> { + let rest = line.strip_prefix("### ")?; + let name = rest.trim_end().trim_end_matches(" Object").to_string(); + if !rest.ends_with(" Object") { + return None; + } + let anchor = name.to_lowercase().replace(' ', "-") + "-object"; + Some((name, anchor)) +} + +fn parse_field_row(line: &str) -> Option { + let cells: Vec<&str> = line + .trim() + .trim_start_matches('|') + .trim_end_matches('|') + .split('|') + .map(|c| c.trim()) + .collect(); + if cells.len() < 3 { + return None; + } + let name_cell = cells[0]; + let type_cell = cells[1]; + let desc_cell = cells[2]; + let (name, anchor) = extract_name_and_anchor(name_cell); + if name.is_empty() { + return None; + } + let required = desc_cell.contains("**REQUIRED**"); + let description_excerpt = strip_markdown(desc_cell) + .chars() + .take(160) + .collect::(); + Some(FieldEntry { + name, + anchor, + type_str: strip_markdown(type_cell), + required, + description_excerpt, + }) +} + +fn extract_name_and_anchor(cell: &str) -> (String, String) { + // Format: fieldName + let mut anchor = String::new(); + let mut rest = cell; + if let Some(start) = cell.find("name=\"") { + let after = &cell[start + 6..]; + if let Some(end) = after.find('"') { + anchor = after[..end].to_string(); + } + } + if let Some(end_tag) = cell.find("") { + rest = &cell[end_tag + 4..]; + } + let name = rest.trim().trim_start_matches('`').trim_end_matches('`'); + let name = name.split_whitespace().next().unwrap_or("").to_string(); + (name, anchor) +} + +fn strip_markdown(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + let mut in_link_text = false; + let mut chars = s.chars().peekable(); + while let Some(c) = chars.next() { + match c { + '`' => {} // strip code fences inline + '[' => { + in_link_text = true; + } + ']' => { + in_link_text = false; + if let Some(&'(') = chars.peek() { + chars.next(); + for inner in chars.by_ref() { + if inner == ')' { + break; + } + } + } + } + '*' => {} // strip emphasis + _ if !in_link_text && c == '<' => { + for inner in chars.by_ref() { + if inner == '>' { + break; + } + } + } + _ => out.push(c), + } + } + out.split_whitespace().collect::>().join(" ") +} + +fn json_schema_2020_12_keywords() -> Vec { + [ + // Core + "$schema", + "$vocabulary", + "$id", + "$ref", + "$anchor", + "$dynamicRef", + "$dynamicAnchor", + "$defs", + "$comment", + // Applicators + "allOf", + "anyOf", + "oneOf", + "not", + "if", + "then", + "else", + "dependentSchemas", + "prefixItems", + "items", + "contains", + "properties", + "patternProperties", + "additionalProperties", + "propertyNames", + // Validation + "type", + "enum", + "const", + "multipleOf", + "maximum", + "exclusiveMaximum", + "minimum", + "exclusiveMinimum", + "maxLength", + "minLength", + "pattern", + "maxItems", + "minItems", + "uniqueItems", + "maxContains", + "minContains", + "maxProperties", + "minProperties", + "required", + "dependentRequired", + // Meta-data + "title", + "description", + "default", + "deprecated", + "readOnly", + "writeOnly", + "examples", + // Format + "format", + // Content + "contentEncoding", + "contentMediaType", + "contentSchema", + // Unevaluated + "unevaluatedItems", + "unevaluatedProperties", + ] + .iter() + .map(|s| s.to_string()) + .collect() +} + +fn parameter_style_matrix() -> Vec { + // From OAS 3.2 §"Parameter Object" style tables and Appendix C. + vec![ + StyleEntry { + style: "matrix", + parameter_in: &["path"], + types: &["primitive", "array", "object"], + rfc6570: "{;var}", + }, + StyleEntry { + style: "label", + parameter_in: &["path"], + types: &["primitive", "array", "object"], + rfc6570: "{.var}", + }, + StyleEntry { + style: "simple", + parameter_in: &["path", "header"], + types: &["primitive", "array", "object"], + rfc6570: "{var}", + }, + StyleEntry { + style: "form", + parameter_in: &["query", "cookie"], + types: &["primitive", "array", "object"], + rfc6570: "{?var}/{&var}", + }, + StyleEntry { + style: "spaceDelimited", + parameter_in: &["query"], + types: &["array", "object"], + rfc6570: "n/a (custom)", + }, + StyleEntry { + style: "pipeDelimited", + parameter_in: &["query"], + types: &["array", "object"], + rfc6570: "n/a (custom)", + }, + StyleEntry { + style: "deepObject", + parameter_in: &["query"], + types: &["object"], + rfc6570: "n/a (custom)", + }, + ] +} + +fn appendices() -> Vec { + vec![ + AppendixEntry { + id: "A", + title: "Revision History", + }, + AppendixEntry { + id: "B", + title: "Data Type Conversion", + }, + AppendixEntry { + id: "C", + title: "Using RFC6570-Based Serialization", + }, + AppendixEntry { + id: "D", + title: "Serializing Headers and Cookies", + }, + AppendixEntry { + id: "E", + title: "Percent-Encoding and Form Media Types", + }, + AppendixEntry { + id: "F", + title: "Base URI Determination and Reference Resolution", + }, + AppendixEntry { + id: "G", + title: "Parsing and Resolution Guidance", + }, + ] +} diff --git a/src/bin/file-beads.rs b/src/bin/file-beads.rs new file mode 100644 index 0000000..7fc451f --- /dev/null +++ b/src/bin/file-beads.rs @@ -0,0 +1,538 @@ +//! Reads tests/conformance/beads.yaml and creates labels + issues in the +//! configured GitHub repo via `gh`. Idempotent: existing labels/issues with +//! the same name/title are skipped. +//! +//! Default: dry-run. Pass `--apply` to actually create. +//! +//! cargo run --bin file-beads # dry-run, prints plan +//! cargo run --bin file-beads -- --apply # creates labels + issues +//! cargo run --bin file-beads -- --apply --beads F1,T1 # subset + +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::path::PathBuf; +use std::process::Command; + +#[derive(Debug, serde::Deserialize)] +struct Beads { + repo: String, + labels: Vec