diff --git a/src/parser/xml.rs b/src/parser/xml.rs
index 877881f..32b798a 100644
--- a/src/parser/xml.rs
+++ b/src/parser/xml.rs
@@ -422,11 +422,14 @@ impl<'a> XmlParser<'a> {
let _ = found_dup; // suppress unused warning
}
- // --- Apply #FIXED default attributes from DTD ATTLIST declarations ---
- // Per XML 1.0 §3.3.2, if an attribute declared with #FIXED is not
+ // --- Apply DTD ATTLIST default attributes (#FIXED and #DEFAULT) ---
+ // Per XML 1.0 §3.3.2, when an attribute declared in an ATTLIST is not
// present on the element, the parser must add it with the declared
- // default value. #DEFAULT attributes are tracked for amplification
- // factor checking but not inserted into the tree (matching libxml2).
+ // default value. This applies to both `#FIXED "v"` and bare `"v"`
+ // (so-called #DEFAULT) declarations. libxml2 applies both during
+ // normal parsing — verifiable via `xmllint --c14n` on a document
+ // with an ATTLIST default, which emits the default attribute in the
+ // canonical form.
// Namespace declarations (xmlns, xmlns:prefix) are inserted before
// other attributes to match libxml2's attribute ordering.
if let Some(defaults) = if self.attr_defaults.is_empty() {
@@ -453,27 +456,29 @@ impl<'a> XmlParser<'a> {
.any(|a| a.name == decl_local && a.prefix.as_deref() == decl_pfx);
if !already_present {
// Track expansion for amplification factor check
- // (both #FIXED and #DEFAULT contribute to expansion)
+ // (both #FIXED and #DEFAULT contribute to expansion).
self.expansion_size += attr_name.len() + value.len();
- // Only insert #FIXED attributes into the tree
- if is_fixed {
- let (decl_prefix, decl_local) = split_name(attr_name);
- let attr = Attribute {
- name: decl_local.to_string(),
- value,
- prefix: decl_prefix.map(String::from),
- namespace: None,
- raw_value: None,
- };
- let is_ns_decl =
- attr_name == "xmlns" || attr_name.starts_with("xmlns:");
- if is_ns_decl {
- attributes.insert(insert_pos, attr);
- insert_pos += 1;
- } else {
- attributes.push(attr);
- }
+ // Insert both #FIXED and #DEFAULT defaults into the
+ // tree. The `is_fixed` flag is no longer used to gate
+ // insertion; it would only matter if we additionally
+ // validated source attributes against #FIXED values,
+ // which is a separate validation step.
+ let _ = is_fixed;
+ let (decl_prefix, decl_local) = split_name(attr_name);
+ let attr = Attribute {
+ name: decl_local.to_string(),
+ value,
+ prefix: decl_prefix.map(String::from),
+ namespace: None,
+ raw_value: None,
+ };
+ let is_ns_decl = attr_name == "xmlns" || attr_name.starts_with("xmlns:");
+ if is_ns_decl {
+ attributes.insert(insert_pos, attr);
+ insert_pos += 1;
+ } else {
+ attributes.push(attr);
}
}
}
diff --git a/src/serial/c14n.rs b/src/serial/c14n.rs
index 576112c..e87ce09 100644
--- a/src/serial/c14n.rs
+++ b/src/serial/c14n.rs
@@ -329,10 +329,26 @@ impl<'a> C14nContext<'a> {
let mut ns_to_output: Vec<(String, String)> = Vec::new();
for (ns_prefix, ns_uri) in &ns_decls {
- if current_rendered.get(ns_prefix) != Some(ns_uri) {
- ns_to_output.push((ns_prefix.clone(), ns_uri.clone()));
- current_rendered.insert(ns_prefix.clone(), ns_uri.clone());
+ if current_rendered.get(ns_prefix) == Some(ns_uri) {
+ continue;
}
+
+ // Special case for `xmlns=""`: per Canonical XML 1.0 §3.7 and
+ // c14n11 §3.1, the empty default-namespace declaration is only
+ // emitted to undeclare a *non-empty* inherited default. If no
+ // non-empty default is currently in scope (parent has no default,
+ // or the inherited default is itself empty), the `xmlns=""` from
+ // the source must not appear in the canonical form.
+ if ns_prefix.is_empty() && ns_uri.is_empty() {
+ let has_nonempty_inherited_default =
+ current_rendered.get("").is_some_and(|s| !s.is_empty());
+ if !has_nonempty_inherited_default {
+ continue;
+ }
+ }
+
+ ns_to_output.push((ns_prefix.clone(), ns_uri.clone()));
+ current_rendered.insert(ns_prefix.clone(), ns_uri.clone());
}
if !self.options.exclusive {
diff --git a/tests/c14n_conformance.rs b/tests/c14n_conformance.rs
new file mode 100644
index 0000000..190c268
--- /dev/null
+++ b/tests/c14n_conformance.rs
@@ -0,0 +1,366 @@
+//! W3C Canonical XML conformance test harness.
+//!
+//! Drives `canonicalize` / `canonicalize_subtree` against the worked examples
+//! published in the C14N 1.0 and Exclusive C14N 1.0 specifications. Each
+//! input is loaded from `tests/data/c14n_w3c/`, canonicalized in the
+//! requested mode, and compared byte-for-byte to the expected output.
+//!
+//! Sources:
+//! - C14N 1.0: §3 ("Examples")
+//! - Exclusive C14N 1.0: §2
+//!
+//! Some published examples require infrastructure outside the c14n module
+//! (XPath-defined node-sets, DTD-driven attribute normalization, external
+//! entity resolution). Those subsections are not represented here.
+//!
+//! On byte mismatch the harness reports the first differing byte plus a
+//! 200-byte window from each side. Trailing newlines are normalized away
+//! because file-save tooling routinely adds them; the spec's canonical
+//! forms have no trailing newline.
+
+use xmloxide::serial::c14n::{canonicalize, canonicalize_subtree, C14nOptions};
+use xmloxide::tree::{Document, NodeId};
+
+#[derive(Debug, Clone, Copy)]
+enum Apex {
+ /// Whole-document canonicalization.
+ Document,
+ /// Subtree rooted at the first element with the given local name.
+ SubtreeAt(&'static str),
+}
+
+#[derive(Debug)]
+struct C14nCase {
+ name: &'static str,
+ input: &'static str,
+ expected: &'static str,
+ apex: Apex,
+ with_comments: bool,
+ exclusive: bool,
+ inclusive_prefixes: &'static [&'static str],
+ /// Provenance for failure reports.
+ source: &'static str,
+}
+
+const CASES: &[C14nCase] = &[
+ // -------- W3C Canonical XML 1.0 --------
+ C14nCase {
+ name: "c14n10_3_1_no_comments",
+ input: "c14n10_3_1_input.xml",
+ expected: "c14n10_3_1_no_comments.xml",
+ apex: Apex::Document,
+ with_comments: false,
+ exclusive: false,
+ inclusive_prefixes: &[],
+ source: "C14N 1.0 §3.1 (without comments)",
+ },
+ C14nCase {
+ name: "c14n10_3_1_with_comments",
+ input: "c14n10_3_1_input.xml",
+ expected: "c14n10_3_1_with_comments.xml",
+ apex: Apex::Document,
+ with_comments: true,
+ exclusive: false,
+ inclusive_prefixes: &[],
+ source: "C14N 1.0 §3.1 (with comments)",
+ },
+ C14nCase {
+ name: "c14n10_3_2_whitespace",
+ input: "c14n10_3_2_input.xml",
+ expected: "c14n10_3_2_expected.xml",
+ apex: Apex::Document,
+ with_comments: false,
+ exclusive: false,
+ inclusive_prefixes: &[],
+ source: "C14N 1.0 §3.2 (whitespace in document content)",
+ },
+ C14nCase {
+ // §3.3 exercises start/end tags, attribute sorting, and namespace
+ // handling. It also relies on a DTD ``
+ // which adds an attribute via DTD defaulting. xmloxide's parser may or
+ // may not apply that — if it doesn't, this test surfaces it as a
+ // missing `attr="default"`.
+ name: "c14n10_3_3_tags",
+ input: "c14n10_3_3_input.xml",
+ expected: "c14n10_3_3_expected.xml",
+ apex: Apex::Document,
+ with_comments: false,
+ exclusive: false,
+ inclusive_prefixes: &[],
+ source: "C14N 1.0 §3.3 (start/end tags)",
+ },
+ C14nCase {
+ name: "c14n10_3_6_utf8",
+ input: "c14n10_3_6_input.xml",
+ expected: "c14n10_3_6_expected.xml",
+ apex: Apex::Document,
+ with_comments: false,
+ exclusive: false,
+ inclusive_prefixes: &[],
+ source: "C14N 1.0 §3.6 (UTF-8 encoding)",
+ },
+ // -------- W3C Exclusive XML Canonicalization 1.0 --------
+ C14nCase {
+ name: "excc14n_2_1_standalone",
+ input: "excc14n_2_1_standalone_input.xml",
+ expected: "excc14n_2_1_standalone_expected.xml",
+ apex: Apex::Document,
+ with_comments: false,
+ exclusive: true,
+ inclusive_prefixes: &[],
+ source: "Exc-C14N 1.0 §2.1 (standalone)",
+ },
+ C14nCase {
+ // §2.1 enveloped: extracting elem1 from a pdu apex must yield the
+ // same canonical form as the standalone case. Subtree extraction.
+ name: "excc14n_2_1_enveloped",
+ input: "excc14n_2_1_enveloped_input.xml",
+ expected: "excc14n_2_1_enveloped_expected.xml",
+ apex: Apex::SubtreeAt("elem1"),
+ with_comments: false,
+ exclusive: true,
+ inclusive_prefixes: &[],
+ source: "Exc-C14N 1.0 §2.1 (enveloped, subtree at elem1)",
+ },
+ C14nCase {
+ // §2.2 input1: elem2 subtree under different enveloping context.
+ name: "excc14n_2_2_input1",
+ input: "excc14n_2_2_input1.xml",
+ expected: "excc14n_2_2_expected.xml",
+ apex: Apex::SubtreeAt("elem2"),
+ with_comments: false,
+ exclusive: true,
+ inclusive_prefixes: &[],
+ source: "Exc-C14N 1.0 §2.2 (input1, subtree at elem2)",
+ },
+ C14nCase {
+ // §2.2 input2: same elem2 subtree under a different enveloping
+ // context. Spec property: byte-equal output to input1's elem2.
+ name: "excc14n_2_2_input2",
+ input: "excc14n_2_2_input2.xml",
+ expected: "excc14n_2_2_expected.xml",
+ apex: Apex::SubtreeAt("elem2"),
+ with_comments: false,
+ exclusive: true,
+ inclusive_prefixes: &[],
+ source: "Exc-C14N 1.0 §2.2 (input2, subtree at elem2)",
+ },
+];
+
+fn fixture_path(name: &str) -> String {
+ format!("{}/tests/data/c14n_w3c/{name}", env!("CARGO_MANIFEST_DIR"))
+}
+
+fn find_first_local(doc: &Document, root: NodeId, local: &str) -> Option {
+ if doc.node_name(root) == Some(local) {
+ return Some(root);
+ }
+ for child in doc.children(root) {
+ if let Some(n) = find_first_local(doc, child, local) {
+ return Some(n);
+ }
+ }
+ None
+}
+
+/// Strip a single trailing newline if present. C14N's spec output has no
+/// trailing newline; file-save tooling routinely adds one. Normalizing both
+/// sides means we can keep readable fixture files without false positives.
+fn trim_trailing_newline(s: &str) -> &str {
+ s.strip_suffix('\n').unwrap_or(s)
+}
+
+fn first_diff_window(expected: &str, actual: &str) -> String {
+ let exp = expected.as_bytes();
+ let act = actual.as_bytes();
+ let mut at = 0usize;
+ while at < exp.len() && at < act.len() && exp[at] == act[at] {
+ at += 1;
+ }
+ let pre = at.saturating_sub(80);
+ let exp_end = (at + 80).min(exp.len());
+ let act_end = (at + 80).min(act.len());
+ format!(
+ " first diff at byte {at}\n expected window: {:?}\n actual window: {:?}",
+ String::from_utf8_lossy(&exp[pre..exp_end]),
+ String::from_utf8_lossy(&act[pre..act_end]),
+ )
+}
+
+#[test]
+fn w3c_c14n_conformance() {
+ let mut failures: Vec = Vec::new();
+
+ for case in CASES {
+ let input_path = fixture_path(case.input);
+ let expected_path = fixture_path(case.expected);
+
+ let input = match std::fs::read_to_string(&input_path) {
+ Ok(s) => s,
+ Err(e) => {
+ failures.push(format!("[{}] read {input_path}: {e}", case.name));
+ continue;
+ }
+ };
+ let expected = match std::fs::read_to_string(&expected_path) {
+ Ok(s) => s,
+ Err(e) => {
+ failures.push(format!("[{}] read {expected_path}: {e}", case.name));
+ continue;
+ }
+ };
+
+ let doc = match Document::parse_str(&input) {
+ Ok(d) => d,
+ Err(e) => {
+ failures.push(format!(
+ "[{}] parse failed: {e:?}\n source: {}",
+ case.name, case.source
+ ));
+ continue;
+ }
+ };
+
+ let opts = C14nOptions {
+ with_comments: case.with_comments,
+ exclusive: case.exclusive,
+ inclusive_prefixes: case
+ .inclusive_prefixes
+ .iter()
+ .map(|s| (*s).to_string())
+ .collect(),
+ };
+
+ let actual = match case.apex {
+ Apex::Document => canonicalize(&doc, &opts),
+ Apex::SubtreeAt(local) => {
+ let Some(root) = doc.root_element() else {
+ failures.push(format!("[{}] no root element", case.name));
+ continue;
+ };
+ let Some(apex) = find_first_local(&doc, root, local) else {
+ failures.push(format!(
+ "[{}] subtree apex <{local}> not found in input",
+ case.name
+ ));
+ continue;
+ };
+ canonicalize_subtree(&doc, apex, &opts)
+ }
+ };
+
+ let exp_trim = trim_trailing_newline(&expected);
+ let act_trim = trim_trailing_newline(&actual);
+
+ if exp_trim != act_trim {
+ failures.push(format!(
+ "[{}] BYTE MISMATCH ({} bytes expected, {} bytes actual)\n source: {}\n{}",
+ case.name,
+ exp_trim.len(),
+ act_trim.len(),
+ case.source,
+ first_diff_window(exp_trim, act_trim),
+ ));
+ }
+ }
+
+ if !failures.is_empty() {
+ let total = CASES.len();
+ let failed = failures.len();
+ panic!(
+ "W3C C14N conformance: {} / {} cases failed\n\n{}\n",
+ failed,
+ total,
+ failures.join("\n\n"),
+ );
+ }
+}
+
+/// Idempotency roundtrip: feeding canonical output back through the parser
+/// + canonicalizer must produce byte-identical output. This is the formal
+/// statement of "the canonical form is canonical" — a c14n implementation
+/// that doesn't satisfy it is by definition non-canonical.
+///
+/// Runs against every case in `CASES`. If a fixture's expected output
+/// re-canonicalizes to anything other than itself, the test fails with
+/// the same byte-window diff used by the conformance test above.
+#[test]
+fn w3c_c14n_idempotency() {
+ let mut failures: Vec = Vec::new();
+
+ for case in CASES {
+ // The "expected" file already represents canonical output for this
+ // case's mode; feeding it back through must reproduce it exactly.
+ let Ok(canonical_input) = std::fs::read_to_string(fixture_path(case.expected)) else {
+ continue;
+ };
+ let canonical_input = trim_trailing_newline(&canonical_input).to_string();
+
+ let doc = match Document::parse_str(&canonical_input) {
+ Ok(d) => d,
+ Err(e) => {
+ failures.push(format!(
+ "[{}] re-parse of canonical output failed: {e:?}",
+ case.name,
+ ));
+ continue;
+ }
+ };
+
+ let opts = C14nOptions {
+ with_comments: case.with_comments,
+ exclusive: case.exclusive,
+ inclusive_prefixes: case
+ .inclusive_prefixes
+ .iter()
+ .map(|s| (*s).to_string())
+ .collect(),
+ };
+
+ // Re-canonicalize using the same apex selection as the original
+ // case so subtree-apex tests stay well-defined.
+ let actual = match case.apex {
+ Apex::Document => canonicalize(&doc, &opts),
+ Apex::SubtreeAt(local) => {
+ let Some(root) = doc.root_element() else {
+ continue;
+ };
+ let Some(apex) = find_first_local(&doc, root, local) else {
+ // The canonical-output file may have stripped enveloping
+ // ancestors, so the original apex element is the new
+ // root. Fall back to canonicalizing the whole doc.
+ let actual = canonicalize(&doc, &opts);
+ let act_trim = trim_trailing_newline(&actual);
+ if act_trim != canonical_input {
+ failures.push(format!(
+ "[{}] subtree-apex idempotency failed\n source: {}\n{}",
+ case.name,
+ case.source,
+ first_diff_window(&canonical_input, act_trim),
+ ));
+ }
+ continue;
+ };
+ canonicalize_subtree(&doc, apex, &opts)
+ }
+ };
+
+ let act_trim = trim_trailing_newline(&actual);
+ if act_trim != canonical_input {
+ failures.push(format!(
+ "[{}] re-canonicalize differs from input\n source: {}\n{}",
+ case.name,
+ case.source,
+ first_diff_window(&canonical_input, act_trim),
+ ));
+ }
+ }
+
+ assert!(
+ failures.is_empty(),
+ "C14N idempotency: {} / {} cases failed\n\n{}\n",
+ failures.len(),
+ CASES.len(),
+ failures.join("\n\n"),
+ );
+}
diff --git a/tests/data/c14n_w3c/c14n10_3_1_input.xml b/tests/data/c14n_w3c/c14n10_3_1_input.xml
new file mode 100644
index 0000000..ed450c7
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_1_input.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+Hello, world!
+
+
+
+
+
+
diff --git a/tests/data/c14n_w3c/c14n10_3_1_no_comments.xml b/tests/data/c14n_w3c/c14n10_3_1_no_comments.xml
new file mode 100644
index 0000000..a5703c1
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_1_no_comments.xml
@@ -0,0 +1,4 @@
+
+Hello, world!
+
diff --git a/tests/data/c14n_w3c/c14n10_3_1_with_comments.xml b/tests/data/c14n_w3c/c14n10_3_1_with_comments.xml
new file mode 100644
index 0000000..5193ede
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_1_with_comments.xml
@@ -0,0 +1,6 @@
+
+Hello, world!
+
+
+
diff --git a/tests/data/c14n_w3c/c14n10_3_2_expected.xml b/tests/data/c14n_w3c/c14n10_3_2_expected.xml
new file mode 100644
index 0000000..74eeea1
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_2_expected.xml
@@ -0,0 +1,11 @@
+
+
+ A B
+
+ A
+
+ B
+ A B
+ C
+
+
diff --git a/tests/data/c14n_w3c/c14n10_3_2_input.xml b/tests/data/c14n_w3c/c14n10_3_2_input.xml
new file mode 100644
index 0000000..74eeea1
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_2_input.xml
@@ -0,0 +1,11 @@
+
+
+ A B
+
+ A
+
+ B
+ A B
+ C
+
+
diff --git a/tests/data/c14n_w3c/c14n10_3_3_expected.xml b/tests/data/c14n_w3c/c14n10_3_3_expected.xml
new file mode 100644
index 0000000..c2b8631
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_3_expected.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/c14n_w3c/c14n10_3_3_input.xml b/tests/data/c14n_w3c/c14n10_3_3_input.xml
new file mode 100644
index 0000000..09567cc
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_3_input.xml
@@ -0,0 +1,18 @@
+]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/c14n_w3c/c14n10_3_6_expected.xml b/tests/data/c14n_w3c/c14n10_3_6_expected.xml
new file mode 100644
index 0000000..a2addeb
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_6_expected.xml
@@ -0,0 +1 @@
+©
diff --git a/tests/data/c14n_w3c/c14n10_3_6_input.xml b/tests/data/c14n_w3c/c14n10_3_6_input.xml
new file mode 100644
index 0000000..31e2071
--- /dev/null
+++ b/tests/data/c14n_w3c/c14n10_3_6_input.xml
@@ -0,0 +1,2 @@
+
+©
diff --git a/tests/data/c14n_w3c/excc14n_2_1_enveloped_expected.xml b/tests/data/c14n_w3c/excc14n_2_1_enveloped_expected.xml
new file mode 100644
index 0000000..e016aa5
--- /dev/null
+++ b/tests/data/c14n_w3c/excc14n_2_1_enveloped_expected.xml
@@ -0,0 +1,3 @@
+
+ content
+
diff --git a/tests/data/c14n_w3c/excc14n_2_1_enveloped_input.xml b/tests/data/c14n_w3c/excc14n_2_1_enveloped_input.xml
new file mode 100644
index 0000000..db8aa7a
--- /dev/null
+++ b/tests/data/c14n_w3c/excc14n_2_1_enveloped_input.xml
@@ -0,0 +1,5 @@
+
+
+ content
+
+
diff --git a/tests/data/c14n_w3c/excc14n_2_1_standalone_expected.xml b/tests/data/c14n_w3c/excc14n_2_1_standalone_expected.xml
new file mode 100644
index 0000000..917afcd
--- /dev/null
+++ b/tests/data/c14n_w3c/excc14n_2_1_standalone_expected.xml
@@ -0,0 +1,3 @@
+
+ content
+
diff --git a/tests/data/c14n_w3c/excc14n_2_1_standalone_input.xml b/tests/data/c14n_w3c/excc14n_2_1_standalone_input.xml
new file mode 100644
index 0000000..917afcd
--- /dev/null
+++ b/tests/data/c14n_w3c/excc14n_2_1_standalone_input.xml
@@ -0,0 +1,3 @@
+
+ content
+
diff --git a/tests/data/c14n_w3c/excc14n_2_2_expected.xml b/tests/data/c14n_w3c/excc14n_2_2_expected.xml
new file mode 100644
index 0000000..a62a15e
--- /dev/null
+++ b/tests/data/c14n_w3c/excc14n_2_2_expected.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/tests/data/c14n_w3c/excc14n_2_2_input1.xml b/tests/data/c14n_w3c/excc14n_2_2_input1.xml
new file mode 100644
index 0000000..0648fa4
--- /dev/null
+++ b/tests/data/c14n_w3c/excc14n_2_2_input1.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
diff --git a/tests/data/c14n_w3c/excc14n_2_2_input2.xml b/tests/data/c14n_w3c/excc14n_2_2_input2.xml
new file mode 100644
index 0000000..60ed873
--- /dev/null
+++ b/tests/data/c14n_w3c/excc14n_2_2_input2.xml
@@ -0,0 +1,9 @@
+
+
+
+
+