From cec02c33dd3bc3b6c265e00302d5b988a2e9066b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dyego=20Aur=C3=A9lio?= Date: Sat, 29 Nov 2025 16:10:40 -0300 Subject: [PATCH] Add duplicate attribute tracking for CSP nonce validation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements detection and propagation of duplicate attributes through the tokenizer, tree builder, and TreeSink interface to support CSP (Content Security Policy) nonce validation. This enables html5ever consumers (e.g., Servo) to properly implement step 3 of the CSP "is element nonceable" algorithm by checking the `ElementFlags.had_duplicate_attrs` field during nonce validation. Reference: - https://www.w3.org/TR/CSP/#is-element-nonceable - https://github.com/servo/servo/commit/4821bc0ab01e1ed0bb27e86c2df545019bd3856a Signed-off-by: Dyego Aurélio --- html5ever/src/tokenizer/interface.rs | 4 + html5ever/src/tokenizer/mod.rs | 8 + html5ever/src/tree_builder/mod.rs | 77 +++++- html5ever/src/tree_builder/rules.rs | 7 +- markup5ever/interface/tree_builder.rs | 24 ++ .../duplicate-attrs.test | 66 +++++ rcdom/tests/duplicate-attrs-integration.rs | 241 ++++++++++++++++++ rcdom/tests/html-tokenizer.rs | 3 + 8 files changed, 415 insertions(+), 15 deletions(-) create mode 100644 rcdom/custom-html5lib-tokenizer-tests/duplicate-attrs.test create mode 100644 rcdom/tests/duplicate-attrs-integration.rs diff --git a/html5ever/src/tokenizer/interface.rs b/html5ever/src/tokenizer/interface.rs index edc6afb9..15d774ab 100644 --- a/html5ever/src/tokenizer/interface.rs +++ b/html5ever/src/tokenizer/interface.rs @@ -40,6 +40,10 @@ pub struct Tag { pub name: LocalName, pub self_closing: bool, pub attrs: Vec, + /// Whether duplicate attributes were encountered during tokenization. + /// This is used for CSP nonce validation - elements with duplicate + /// attributes are not nonceable per the CSP spec. + pub had_duplicate_attrs: bool, } impl Tag { diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index eccc5690..37762f59 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -133,6 +133,9 @@ pub struct Tokenizer { /// Current tag is self-closing? current_tag_self_closing: Cell, + /// Current tag had duplicate attributes? + current_tag_had_duplicate_attrs: Cell, + /// Current tag attributes. current_tag_attrs: RefCell>, @@ -186,6 +189,7 @@ impl Tokenizer { current_tag_kind: Cell::new(StartTag), current_tag_name: RefCell::new(StrTendril::new()), current_tag_self_closing: Cell::new(false), + current_tag_had_duplicate_attrs: Cell::new(false), current_tag_attrs: RefCell::new(vec![]), current_attr_name: RefCell::new(StrTendril::new()), current_attr_value: RefCell::new(StrTendril::new()), @@ -440,6 +444,7 @@ impl Tokenizer { name, self_closing: self.current_tag_self_closing.get(), attrs: std::mem::take(&mut self.current_tag_attrs.borrow_mut()), + had_duplicate_attrs: self.current_tag_had_duplicate_attrs.get(), }); match self.process_token(token) { @@ -481,6 +486,7 @@ impl Tokenizer { fn discard_tag(&self) { self.current_tag_name.borrow_mut().clear(); self.current_tag_self_closing.set(false); + self.current_tag_had_duplicate_attrs.set(false); *self.current_tag_attrs.borrow_mut() = vec![]; } @@ -523,6 +529,7 @@ impl Tokenizer { if dup { self.emit_error(Borrowed("Duplicate attribute")); + self.current_tag_had_duplicate_attrs.set(true); self.current_attr_name.borrow_mut().clear(); self.current_attr_value.borrow_mut().clear(); } else { @@ -2217,6 +2224,7 @@ mod test { name, self_closing: false, attrs: vec![], + had_duplicate_attrs: false, }) } diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index f0d89b92..1bd1a607 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -12,6 +12,7 @@ pub use crate::interface::{create_element, ElemName, ElementFlags, Tracer, TreeSink}; pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText}; pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; +pub use markup5ever::interface::tree_builder::create_element_with_flags; use self::types::*; @@ -733,6 +734,7 @@ where name: subject, self_closing: false, attrs: vec![], + had_duplicate_attrs: false, }); }; @@ -828,10 +830,11 @@ where }; // FIXME: Is there a way to avoid cloning the attributes twice here (once on their // own, once as part of t.clone() above)? - let new_element = create_element( + let new_element = create_element_with_flags( &self.sink, QualName::new(None, ns!(html), tag.name.clone()), tag.attrs.clone(), + tag.had_duplicate_attrs, ); self.open_elems.borrow_mut()[node_index] = new_element.clone(); self.active_formatting.borrow_mut()[node_formatting_index] = @@ -860,10 +863,11 @@ where // 15. // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, // once as part of t.clone() above)? - let new_element = create_element( + let new_element = create_element_with_flags( &self.sink, QualName::new(None, ns!(html), fmt_elem_tag.name.clone()), fmt_elem_tag.attrs.clone(), + fmt_elem_tag.had_duplicate_attrs, ); let new_entry = FormatEntry::Element(new_element.clone(), fmt_elem_tag); @@ -1010,6 +1014,7 @@ where ns!(html), tag.name.clone(), tag.attrs.clone(), + tag.had_duplicate_attrs, ); // Step 9. Replace the entry for entry in the list with an entry for new element. @@ -1363,6 +1368,7 @@ where ns: Namespace, name: LocalName, attrs: Vec, + had_duplicate_attrs: bool, ) -> Handle { declare_tag_set!(form_associatable = "button" "fieldset" "input" "object" @@ -1372,7 +1378,12 @@ where // Step 7. let qname = QualName::new(None, ns, name); - let elem = create_element(&self.sink, qname.clone(), attrs.clone()); + let elem = create_element_with_flags( + &self.sink, + qname.clone(), + attrs.clone(), + had_duplicate_attrs, + ); let insertion_point = self.appropriate_place_for_insertion(None); let (node1, node2) = match insertion_point { @@ -1410,15 +1421,27 @@ where } fn insert_element_for(&self, tag: Tag) -> Handle { - self.insert_element(PushFlag::Push, ns!(html), tag.name, tag.attrs) + self.insert_element( + PushFlag::Push, + ns!(html), + tag.name, + tag.attrs, + tag.had_duplicate_attrs, + ) } fn insert_and_pop_element_for(&self, tag: Tag) -> Handle { - self.insert_element(PushFlag::NoPush, ns!(html), tag.name, tag.attrs) + self.insert_element( + PushFlag::NoPush, + ns!(html), + tag.name, + tag.attrs, + tag.had_duplicate_attrs, + ) } fn insert_phantom(&self, name: LocalName) -> Handle { - self.insert_element(PushFlag::Push, ns!(html), name, vec![]) + self.insert_element(PushFlag::Push, ns!(html), name, vec![], false) } /// @@ -1429,8 +1452,13 @@ where only_add_to_element_stack: bool, ) -> Handle { let adjusted_insertion_location = self.appropriate_place_for_insertion(None); - let qname = QualName::new(None, ns, tag.name); - let elem = create_element(&self.sink, qname.clone(), tag.attrs.clone()); + let qname = QualName::new(None, ns, tag.name.clone()); + let elem = create_element_with_flags( + &self.sink, + qname.clone(), + tag.attrs.clone(), + tag.had_duplicate_attrs, + ); if !only_add_to_element_stack { self.insert_at(adjusted_insertion_location, AppendNode(elem.clone())); @@ -1520,6 +1548,7 @@ where ns!(html), tag.name.clone(), tag.attrs.clone(), + tag.had_duplicate_attrs, ); self.active_formatting .borrow_mut() @@ -1655,10 +1684,22 @@ where self.adjust_foreign_attributes(&mut tag); if tag.self_closing { - self.insert_element(PushFlag::NoPush, ns, tag.name, tag.attrs); + self.insert_element( + PushFlag::NoPush, + ns, + tag.name, + tag.attrs, + tag.had_duplicate_attrs, + ); ProcessResult::DoneAckSelfClosing } else { - self.insert_element(PushFlag::Push, ns, tag.name, tag.attrs); + self.insert_element( + PushFlag::Push, + ns, + tag.name, + tag.attrs, + tag.had_duplicate_attrs, + ); ProcessResult::Done } } @@ -1823,10 +1864,22 @@ where self.adjust_foreign_attributes(&mut tag); if tag.self_closing { // FIXME(#118): "#; + + let sink = driver::parse_fragment( + sink, + driver::ParseOpts::default(), + QualName::new(None, ns!(html), local_name!("body")), + vec![], + false, + ) + .one(input) + .finish(); + + let flags = sink.had_duplicate_attrs_flags.borrow(); + let has_duplicate = flags.iter().any(|&f| f); + + assert!( + has_duplicate, + "Expected script with duplicate nonce to have had_duplicate_attrs=true" + ); +} diff --git a/rcdom/tests/html-tokenizer.rs b/rcdom/tests/html-tokenizer.rs index 9ff7dc69..d618ff0b 100644 --- a/rcdom/tests/html-tokenizer.rs +++ b/rcdom/tests/html-tokenizer.rs @@ -135,6 +135,7 @@ impl TokenSink for TokenLogger { }, _ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)), } + t.had_duplicate_attrs = false; self.push(TagToken(t)); }, @@ -250,6 +251,7 @@ fn json_to_token(js: &Value) -> Token { Some(b) => b.get_bool(), None => false, }, + had_duplicate_attrs: false, }), "EndTag" => TagToken(Tag { @@ -257,6 +259,7 @@ fn json_to_token(js: &Value) -> Token { name: LocalName::from(&*args[0].get_str()), attrs: vec![], self_closing: false, + had_duplicate_attrs: false, }), "Comment" => CommentToken(args[0].get_tendril()),