From 84512f3ab1040a9c95b5659a534e553e6d0e7a9f Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sun, 26 Jan 2025 13:19:49 +0900 Subject: [PATCH 1/9] Use more StringScanner based API for parse_id_invalid_details --- lib/rexml/parsers/baseparser.rb | 21 ++++++++++---------- test/parse/test_document_type_declaration.rb | 4 ++-- test/parse/test_notation_declaration.rb | 10 +++++----- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 8fe287a7..3cb5a637 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -696,36 +696,35 @@ def parse_id(base_error_message, def parse_id_invalid_details(accept_external_id:, accept_public_id:) - public = /\A\s*PUBLIC/um - system = /\A\s*SYSTEM/um - if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um) - if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um) + @source.skip_spaces + if (accept_external_id or accept_public_id) and @source.match?("PUBLIC", true) + if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) return "public ID literal is missing" end - unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um) + unless @source.match?(/\s+#{PUBIDLITERAL}/um) return "invalid public ID literal" end if accept_public_id - if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) + if @source.match?(/\s+#{PUBIDLITERAL}\s+[^'"]/um) return "system ID literal is missing" end - unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) + unless @source.match?(/\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) return "invalid system literal" end "garbage after system literal" else "garbage after public ID literal" end - elsif accept_external_id and @source.match?(/#{system}/um) - if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um) + elsif accept_external_id and @source.match?("SYSTEM", true) + if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) return "system literal is missing" end - unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um) + unless @source.match?(/\s+#{SYSTEMLITERAL}/um) return "invalid system literal" end "garbage after system literal" else - unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um) + unless @source.match?(/(?:PUBLIC|SYSTEM)\s/um) return "invalid ID type" end "ID type is missing" diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index d4658b9e..3b2da01f 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -153,7 +153,7 @@ def test_no_literal Line: 3 Position: 26 Last 80 unconsumed characters: -SYSTEM> +> DETAIL end @@ -200,7 +200,7 @@ def test_content_double_quote Line: 3 Position: 62 Last 80 unconsumed characters: -PUBLIC 'double quote " is invalid' "r.dtd"> + 'double quote " is invalid' "r.dtd"> DETAIL end diff --git a/test/parse/test_notation_declaration.rb b/test/parse/test_notation_declaration.rb index 9e81b6a4..96ae4ba0 100644 --- a/test/parse/test_notation_declaration.rb +++ b/test/parse/test_notation_declaration.rb @@ -80,7 +80,7 @@ def test_invalid_id_type Line: 5 Position: 85 Last 80 unconsumed characters: - INVALID> ]> +INVALID> ]> DETAIL end end @@ -98,7 +98,7 @@ def test_no_literal Line: 5 Position: 84 Last 80 unconsumed characters: - SYSTEM> ]> +> ]> DETAIL end @@ -145,7 +145,7 @@ def test_content_double_quote Line: 5 Position: 129 Last 80 unconsumed characters: - PUBLIC 'double quote " is invalid' "system-literal"> ]> + 'double quote " is invalid' "system-literal"> ]> DETAIL end @@ -229,7 +229,7 @@ def test_no_literal Line: 5 Position: 84 Last 80 unconsumed characters: - PUBLIC> ]> +> ]> DETAIL end @@ -244,7 +244,7 @@ def test_literal_content_double_quote Line: 5 Position: 128 Last 80 unconsumed characters: - PUBLIC 'double quote \" is invalid in PubidLiteral'> ]> + 'double quote \" is invalid in PubidLiteral'> ]> DETAIL end From f62efc37bc72df095ddf2c6a1389ff1264fc6d85 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sat, 13 Sep 2025 15:28:36 +0900 Subject: [PATCH 2/9] Use more StringScanner based API for parse_id --- lib/rexml/parsers/baseparser.rb | 82 +++++++++++--------- test/parse/test_document_type_declaration.rb | 6 +- test/parse/test_notation_declaration.rb | 30 +++---- 3 files changed, 64 insertions(+), 54 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 3cb5a637..51fb470d 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -158,6 +158,9 @@ module Private DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/ end XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace" + EXTERNAL_ID_PUBLIC_PATTERN = /\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um + EXTERNAL_ID_SYSTEM_PATTERN = /\s+#{SYSTEMLITERAL}/um + PUBLIC_ID_PATTERN = /\s+#{PUBIDLITERAL}/um end private_constant :Private @@ -669,66 +672,73 @@ def parse_name(base_error_message) def parse_id(base_error_message, accept_external_id:, accept_public_id:) - if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true)) - pubid = system = nil - pubid_literal = md[1] - pubid = pubid_literal[1..-2] if pubid_literal # Remove quote - system_literal = md[2] - system = system_literal[1..-2] if system_literal # Remove quote - ["PUBLIC", pubid, system] - elsif accept_public_id and (md = @source.match(PUBLIC_ID, true)) - pubid = system = nil - pubid_literal = md[1] - pubid = pubid_literal[1..-2] if pubid_literal # Remove quote - ["PUBLIC", pubid, nil] - elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true)) - system = nil - system_literal = md[1] - system = system_literal[1..-2] if system_literal # Remove quote - ["SYSTEM", nil, system] + @source.skip_spaces + if @source.match?("PUBLIC", true) + if accept_external_id and (md = @source.match(Private::EXTERNAL_ID_PUBLIC_PATTERN, true)) + pubid = system = nil + pubid_literal = md[1] + pubid = pubid_literal[1..-2] if pubid_literal # Remove quote + system_literal = md[2] + system = system_literal[1..-2] if system_literal # Remove quote + return ["PUBLIC", pubid, system] + elsif accept_public_id and (md = @source.match(Private::PUBLIC_ID_PATTERN, true)) + pubid = system = nil + pubid_literal = md[1] + pubid = pubid_literal[1..-2] if pubid_literal # Remove quote + return ["PUBLIC", pubid, nil] + end + details = parse_id_invalid_details_public(accept_external_id: accept_external_id, + accept_public_id: accept_public_id) + elsif @source.match?("SYSTEM", true) + if accept_external_id and (md = @source.match(Private::EXTERNAL_ID_SYSTEM_PATTERN, true)) + system = nil + system_literal = md[1] + system = system_literal[1..-2] if system_literal # Remove quote + return ["SYSTEM", nil, system] + end + details = parse_id_invalid_details_system(accept_external_id: accept_external_id) else - details = parse_id_invalid_details(accept_external_id: accept_external_id, - accept_public_id: accept_public_id) - message = "#{base_error_message}: #{details}" - raise REXML::ParseException.new(message, @source) + details = "invalid ID type" end + message = "#{base_error_message}: #{details}" + raise REXML::ParseException.new(message, @source) end - def parse_id_invalid_details(accept_external_id:, + def parse_id_invalid_details_public(accept_external_id:, accept_public_id:) - @source.skip_spaces - if (accept_external_id or accept_public_id) and @source.match?("PUBLIC", true) + if (accept_external_id or accept_public_id) if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) return "public ID literal is missing" end - unless @source.match?(/\s+#{PUBIDLITERAL}/um) + unless @source.match?(Private::PUBLIC_ID_PATTERN) return "invalid public ID literal" end if accept_public_id if @source.match?(/\s+#{PUBIDLITERAL}\s+[^'"]/um) return "system ID literal is missing" end - unless @source.match?(/\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) + unless @source.match?(Private::EXTERNAL_ID_PUBLIC_PATTERN) return "invalid system literal" end - "garbage after system literal" + return "garbage after system literal" else - "garbage after public ID literal" + return "garbage after public ID literal" end - elsif accept_external_id and @source.match?("SYSTEM", true) + end + "ID type is missing" + end + + def parse_id_invalid_details_system(accept_external_id:) + if accept_external_id if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) return "system literal is missing" end - unless @source.match?(/\s+#{SYSTEMLITERAL}/um) + unless @source.match?(Private::EXTERNAL_ID_SYSTEM_PATTERN) return "invalid system literal" end - "garbage after system literal" - else - unless @source.match?(/(?:PUBLIC|SYSTEM)\s/um) - return "invalid ID type" - end - "ID type is missing" + return "garbage after system literal" end + "ID type is missing" end def process_comment diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index 3b2da01f..e0a663ef 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -165,10 +165,10 @@ def test_garbage_after_literal end assert_equal(<<-DETAIL.chomp, exception.to_s) Malformed DOCTYPE: garbage after external ID -Line: 3 -Position: 36 +Line: 1 +Position: 29 Last 80 unconsumed characters: -x'> +x'> DETAIL end diff --git a/test/parse/test_notation_declaration.rb b/test/parse/test_notation_declaration.rb index 96ae4ba0..11914d37 100644 --- a/test/parse/test_notation_declaration.rb +++ b/test/parse/test_notation_declaration.rb @@ -32,10 +32,10 @@ def test_no_name end assert_equal(<<-DETAIL.chomp, exception.to_s) Malformed notation declaration: name is missing -Line: 5 -Position: 72 +Line: 2 +Position: 62 Last 80 unconsumed characters: - ]> + DETAIL end @@ -62,10 +62,10 @@ def test_no_id_type end assert_equal(<<-DETAIL.chomp, exception.to_s) Malformed notation declaration: invalid ID type -Line: 5 -Position: 77 +Line: 2 +Position: 67 Last 80 unconsumed characters: -> ]> +> DETAIL end @@ -77,10 +77,10 @@ def test_invalid_id_type end assert_equal(<<-DETAIL.chomp, exception.to_s) Malformed notation declaration: invalid ID type -Line: 5 -Position: 85 +Line: 2 +Position: 75 Last 80 unconsumed characters: -INVALID> ]> +INVALID> DETAIL end end @@ -110,10 +110,10 @@ def test_garbage_after_literal end assert_equal(<<-DETAIL.chomp, exception.to_s) Malformed notation declaration: garbage before end > -Line: 5 -Position: 103 +Line: 2 +Position: 93 Last 80 unconsumed characters: -x'> ]> +x'> DETAIL end @@ -173,10 +173,10 @@ def test_garbage_after_literal end assert_equal(<<-DETAIL.chomp, exception.to_s) Malformed notation declaration: garbage before end > -Line: 5 -Position: 123 +Line: 2 +Position: 113 Last 80 unconsumed characters: -x'> ]> +x'> DETAIL end From 1280564f2ac2c8b597bdba6345a208531480b76b Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 17 Feb 2025 08:13:32 +0900 Subject: [PATCH 3/9] Remove unnecessary checks ## Why? If parse_id_invalid_details_system is called, always `accept_external_id == true` and `@source.match?(Private::EXTERNAL_ID_SYSTEM_PATTERN) == true`. --- lib/rexml/parsers/baseparser.rb | 5 +---- test/parse/test_document_type_declaration.rb | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 51fb470d..d5b88038 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -733,10 +733,7 @@ def parse_id_invalid_details_system(accept_external_id:) if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) return "system literal is missing" end - unless @source.match?(Private::EXTERNAL_ID_SYSTEM_PATTERN) - return "invalid system literal" - end - return "garbage after system literal" + return "invalid system literal" end "ID type is missing" end diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index e0a663ef..3d9945bf 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -157,6 +157,21 @@ def test_no_literal DETAIL end + def test_garbage_invalid_system_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) + + DETAIL + end + def test_garbage_after_literal exception = assert_raise(REXML::ParseException) do parse(<<-DOCTYPE) From 00b0a80a0091f41eb4adf74429ff3122a6928be2 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Tue, 18 Feb 2025 08:37:48 +0900 Subject: [PATCH 4/9] add test_garbage_after_public_ID_literal --- test/parse/test_document_type_declaration.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index 3d9945bf..cc37ad3f 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -235,6 +235,21 @@ def test_double_quote end class TestSystemLiteral < self + def test_garbage_after_public_ID_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) + + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: garbage after public ID literal +Line: 3 +Position: 54 +Last 80 unconsumed characters: + "public-id-literal" 'system> + DETAIL + end + def test_garbage_after_literal exception = assert_raise(REXML::ParseException) do parse(<<-DOCTYPE) From 4780f797c072c636a0f9e3f6ca67d45c9c818e19 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sat, 13 Sep 2025 21:01:00 +0900 Subject: [PATCH 5/9] Remove the unnecessary accept_external_id variable # Why? accept_external_id is always true. --- lib/rexml/parsers/baseparser.rb | 57 +++++++++++++-------------------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index d5b88038..7e6a1794 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -310,7 +310,6 @@ def pull_event @source.ensure_buffer else id = parse_id(base_error_message, - accept_external_id: true, accept_public_id: false) if id[0] == "SYSTEM" # For backward compatibility @@ -412,7 +411,6 @@ def pull_event end name = parse_name(base_error_message) id = parse_id(base_error_message, - accept_external_id: true, accept_public_id: true) @source.skip_spaces unless @source.match?(">", true) @@ -670,11 +668,10 @@ def parse_name(base_error_message) end def parse_id(base_error_message, - accept_external_id:, accept_public_id:) @source.skip_spaces if @source.match?("PUBLIC", true) - if accept_external_id and (md = @source.match(Private::EXTERNAL_ID_PUBLIC_PATTERN, true)) + if (md = @source.match(Private::EXTERNAL_ID_PUBLIC_PATTERN, true)) pubid = system = nil pubid_literal = md[1] pubid = pubid_literal[1..-2] if pubid_literal # Remove quote @@ -687,16 +684,15 @@ def parse_id(base_error_message, pubid = pubid_literal[1..-2] if pubid_literal # Remove quote return ["PUBLIC", pubid, nil] end - details = parse_id_invalid_details_public(accept_external_id: accept_external_id, - accept_public_id: accept_public_id) + details = parse_id_invalid_details_public(accept_public_id: accept_public_id) elsif @source.match?("SYSTEM", true) - if accept_external_id and (md = @source.match(Private::EXTERNAL_ID_SYSTEM_PATTERN, true)) + if (md = @source.match(Private::EXTERNAL_ID_SYSTEM_PATTERN, true)) system = nil system_literal = md[1] system = system_literal[1..-2] if system_literal # Remove quote return ["SYSTEM", nil, system] end - details = parse_id_invalid_details_system(accept_external_id: accept_external_id) + details = parse_id_invalid_details_system else details = "invalid ID type" end @@ -704,38 +700,31 @@ def parse_id(base_error_message, raise REXML::ParseException.new(message, @source) end - def parse_id_invalid_details_public(accept_external_id:, - accept_public_id:) - if (accept_external_id or accept_public_id) - if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) - return "public ID literal is missing" - end - unless @source.match?(Private::PUBLIC_ID_PATTERN) - return "invalid public ID literal" + def parse_id_invalid_details_public(accept_public_id:) + if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) + return "public ID literal is missing" + end + unless @source.match?(Private::PUBLIC_ID_PATTERN) + return "invalid public ID literal" + end + if accept_public_id + if @source.match?(/\s+#{PUBIDLITERAL}\s+[^'"]/um) + return "system ID literal is missing" end - if accept_public_id - if @source.match?(/\s+#{PUBIDLITERAL}\s+[^'"]/um) - return "system ID literal is missing" - end - unless @source.match?(Private::EXTERNAL_ID_PUBLIC_PATTERN) - return "invalid system literal" - end - return "garbage after system literal" - else - return "garbage after public ID literal" + unless @source.match?(Private::EXTERNAL_ID_PUBLIC_PATTERN) + return "invalid system literal" end + return "garbage after system literal" + else + return "garbage after public ID literal" end - "ID type is missing" end - def parse_id_invalid_details_system(accept_external_id:) - if accept_external_id - if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) - return "system literal is missing" - end - return "invalid system literal" + def parse_id_invalid_details_system + if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) + return "system literal is missing" end - "ID type is missing" + return "invalid system literal" end def process_comment From 15e75eb61027c9836f3d21f8f44c23b989307a73 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sat, 13 Sep 2025 22:17:56 +0900 Subject: [PATCH 6/9] Integrate the parse_id_invalid_details_system method --- lib/rexml/parsers/baseparser.rb | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 7e6a1794..f4f210bc 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -692,7 +692,11 @@ def parse_id(base_error_message, system = system_literal[1..-2] if system_literal # Remove quote return ["SYSTEM", nil, system] end - details = parse_id_invalid_details_system + if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) + details = "system literal is missing" + else + details = "invalid system literal" + end else details = "invalid ID type" end @@ -720,13 +724,6 @@ def parse_id_invalid_details_public(accept_public_id:) end end - def parse_id_invalid_details_system - if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) - return "system literal is missing" - end - return "invalid system literal" - end - def process_comment text = @source.read_until("-->") unless text.chomp!("-->") From 4e363ca6fd7351e6602f3a587113f6f57cc73eff Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sat, 13 Sep 2025 22:00:35 +0900 Subject: [PATCH 7/9] Remove the unnecessary parse_id_invalid_details_public path # Why? If `accept_public_id == true` and `@source.match?(Private::PUBLIC_ID_PATTERN) == true`, the `parse_id_invalid_details_public` method is not reached. --- lib/rexml/parsers/baseparser.rb | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index f4f210bc..5da49191 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -684,7 +684,7 @@ def parse_id(base_error_message, pubid = pubid_literal[1..-2] if pubid_literal # Remove quote return ["PUBLIC", pubid, nil] end - details = parse_id_invalid_details_public(accept_public_id: accept_public_id) + details = parse_id_invalid_details_public elsif @source.match?("SYSTEM", true) if (md = @source.match(Private::EXTERNAL_ID_SYSTEM_PATTERN, true)) system = nil @@ -704,24 +704,14 @@ def parse_id(base_error_message, raise REXML::ParseException.new(message, @source) end - def parse_id_invalid_details_public(accept_public_id:) + def parse_id_invalid_details_public if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) return "public ID literal is missing" end unless @source.match?(Private::PUBLIC_ID_PATTERN) return "invalid public ID literal" end - if accept_public_id - if @source.match?(/\s+#{PUBIDLITERAL}\s+[^'"]/um) - return "system ID literal is missing" - end - unless @source.match?(Private::EXTERNAL_ID_PUBLIC_PATTERN) - return "invalid system literal" - end - return "garbage after system literal" - else - return "garbage after public ID literal" - end + return "garbage after public ID literal" end def process_comment From 30118a94845b98228b4dffa8821b791f2a98c027 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sat, 13 Sep 2025 22:47:10 +0900 Subject: [PATCH 8/9] Integrate the parse_id_invalid_details_public method --- lib/rexml/parsers/baseparser.rb | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 5da49191..d2e431c1 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -684,7 +684,13 @@ def parse_id(base_error_message, pubid = pubid_literal[1..-2] if pubid_literal # Remove quote return ["PUBLIC", pubid, nil] end - details = parse_id_invalid_details_public + if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) + details = "public ID literal is missing" + elsif !@source.match?(Private::PUBLIC_ID_PATTERN) + details = "invalid public ID literal" + else + details = "garbage after public ID literal" + end elsif @source.match?("SYSTEM", true) if (md = @source.match(Private::EXTERNAL_ID_SYSTEM_PATTERN, true)) system = nil @@ -704,16 +710,6 @@ def parse_id(base_error_message, raise REXML::ParseException.new(message, @source) end - def parse_id_invalid_details_public - if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) - return "public ID literal is missing" - end - unless @source.match?(Private::PUBLIC_ID_PATTERN) - return "invalid public ID literal" - end - return "garbage after public ID literal" - end - def process_comment text = @source.read_until("-->") unless text.chomp!("-->") From d42a439f61e2f6dff60d45915ef9d2c7c1cd5a72 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sun, 14 Sep 2025 17:17:34 +0900 Subject: [PATCH 9/9] Move the raise statements in parse_id individually. --- lib/rexml/parsers/baseparser.rb | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index d2e431c1..0b48ec14 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -677,37 +677,33 @@ def parse_id(base_error_message, pubid = pubid_literal[1..-2] if pubid_literal # Remove quote system_literal = md[2] system = system_literal[1..-2] if system_literal # Remove quote - return ["PUBLIC", pubid, system] + ["PUBLIC", pubid, system] elsif accept_public_id and (md = @source.match(Private::PUBLIC_ID_PATTERN, true)) pubid = system = nil pubid_literal = md[1] pubid = pubid_literal[1..-2] if pubid_literal # Remove quote - return ["PUBLIC", pubid, nil] - end - if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) - details = "public ID literal is missing" + ["PUBLIC", pubid, nil] + elsif @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) + raise REXML::ParseException.new("#{base_error_message}: public ID literal is missing", @source) elsif !@source.match?(Private::PUBLIC_ID_PATTERN) - details = "invalid public ID literal" + raise REXML::ParseException.new("#{base_error_message}: invalid public ID literal", @source) else - details = "garbage after public ID literal" + raise REXML::ParseException.new("#{base_error_message}: garbage after public ID literal", @source) end elsif @source.match?("SYSTEM", true) if (md = @source.match(Private::EXTERNAL_ID_SYSTEM_PATTERN, true)) system = nil system_literal = md[1] system = system_literal[1..-2] if system_literal # Remove quote - return ["SYSTEM", nil, system] - end - if @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) - details = "system literal is missing" + ["SYSTEM", nil, system] + elsif @source.match?(/(?:\s+[^'"]|\s*[\[>])/um) + raise REXML::ParseException.new("#{base_error_message}: system literal is missing", @source) else - details = "invalid system literal" + raise REXML::ParseException.new("#{base_error_message}: invalid system literal", @source) end else - details = "invalid ID type" + raise REXML::ParseException.new("#{base_error_message}: invalid ID type", @source) end - message = "#{base_error_message}: #{details}" - raise REXML::ParseException.new(message, @source) end def process_comment