diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 8fe287a7..0b48ec14 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -158,6 +158,9 @@ module Private
DEFAULT_ENTITIES_PATTERNS[term] = /{term};/
end
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
+ EXTERNAL_ID_PUBLIC_PATTERN = /\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um
+ EXTERNAL_ID_SYSTEM_PATTERN = /\s+#{SYSTEMLITERAL}/um
+ PUBLIC_ID_PATTERN = /\s+#{PUBIDLITERAL}/um
end
private_constant :Private
@@ -307,7 +310,6 @@ def pull_event
@source.ensure_buffer
else
id = parse_id(base_error_message,
- accept_external_id: true,
accept_public_id: false)
if id[0] == "SYSTEM"
# For backward compatibility
@@ -409,7 +411,6 @@ def pull_event
end
name = parse_name(base_error_message)
id = parse_id(base_error_message,
- accept_external_id: true,
accept_public_id: true)
@source.skip_spaces
unless @source.match?(">", true)
@@ -667,68 +668,41 @@ def parse_name(base_error_message)
end
def parse_id(base_error_message,
- accept_external_id:,
accept_public_id:)
- if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
- pubid = system = nil
- pubid_literal = md[1]
- pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
- system_literal = md[2]
- system = system_literal[1..-2] if system_literal # Remove quote
- ["PUBLIC", pubid, system]
- elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
- pubid = system = nil
- pubid_literal = md[1]
- pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
- ["PUBLIC", pubid, nil]
- elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
- system = nil
- system_literal = md[1]
- system = system_literal[1..-2] if system_literal # Remove quote
- ["SYSTEM", nil, system]
- else
- details = parse_id_invalid_details(accept_external_id: accept_external_id,
- accept_public_id: accept_public_id)
- message = "#{base_error_message}: #{details}"
- raise REXML::ParseException.new(message, @source)
- end
- end
-
- def parse_id_invalid_details(accept_external_id:,
- accept_public_id:)
- public = /\A\s*PUBLIC/um
- system = /\A\s*SYSTEM/um
- if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
- if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
- return "public ID literal is missing"
- end
- unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
- return "invalid public ID literal"
- end
- if accept_public_id
- if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
- return "system ID literal is missing"
- end
- unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
- return "invalid system literal"
- end
- "garbage after system literal"
+ @source.skip_spaces
+ if @source.match?("PUBLIC", true)
+ if (md = @source.match(Private::EXTERNAL_ID_PUBLIC_PATTERN, true))
+ pubid = system = nil
+ pubid_literal = md[1]
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
+ system_literal = md[2]
+ system = system_literal[1..-2] if system_literal # Remove quote
+ ["PUBLIC", pubid, system]
+ elsif accept_public_id and (md = @source.match(Private::PUBLIC_ID_PATTERN, true))
+ pubid = system = nil
+ pubid_literal = md[1]
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
+ ["PUBLIC", pubid, nil]
+ elsif @source.match?(/(?:\s+[^'"]|\s*[\[>])/um)
+ raise REXML::ParseException.new("#{base_error_message}: public ID literal is missing", @source)
+ elsif !@source.match?(Private::PUBLIC_ID_PATTERN)
+ raise REXML::ParseException.new("#{base_error_message}: invalid public ID literal", @source)
else
- "garbage after public ID literal"
+ raise REXML::ParseException.new("#{base_error_message}: garbage after public ID literal", @source)
end
- elsif accept_external_id and @source.match?(/#{system}/um)
- if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
- return "system literal is missing"
- end
- unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
- return "invalid system literal"
+ elsif @source.match?("SYSTEM", true)
+ if (md = @source.match(Private::EXTERNAL_ID_SYSTEM_PATTERN, true))
+ system = nil
+ system_literal = md[1]
+ system = system_literal[1..-2] if system_literal # Remove quote
+ ["SYSTEM", nil, system]
+ elsif @source.match?(/(?:\s+[^'"]|\s*[\[>])/um)
+ raise REXML::ParseException.new("#{base_error_message}: system literal is missing", @source)
+ else
+ raise REXML::ParseException.new("#{base_error_message}: invalid system literal", @source)
end
- "garbage after system literal"
else
- unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
- return "invalid ID type"
- end
- "ID type is missing"
+ raise REXML::ParseException.new("#{base_error_message}: invalid ID type", @source)
end
end
diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb
index d4658b9e..cc37ad3f 100644
--- a/test/parse/test_document_type_declaration.rb
+++ b/test/parse/test_document_type_declaration.rb
@@ -153,7 +153,22 @@ def test_no_literal
Line: 3
Position: 26
Last 80 unconsumed characters:
-SYSTEM>
+>
+ DETAIL
+ end
+
+ def test_garbage_invalid_system_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+
DETAIL
end
@@ -165,10 +180,10 @@ def test_garbage_after_literal
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: garbage after external ID
-Line: 3
-Position: 36
+Line: 1
+Position: 29
Last 80 unconsumed characters:
-x'>
+x'>
DETAIL
end
@@ -200,7 +215,7 @@ def test_content_double_quote
Line: 3
Position: 62
Last 80 unconsumed characters:
-PUBLIC 'double quote " is invalid' "r.dtd">
+ 'double quote " is invalid' "r.dtd">
DETAIL
end
@@ -220,6 +235,21 @@ def test_double_quote
end
class TestSystemLiteral < self
+ def test_garbage_after_public_ID_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+
+ DETAIL
+ end
+
def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-DOCTYPE)
diff --git a/test/parse/test_notation_declaration.rb b/test/parse/test_notation_declaration.rb
index 9e81b6a4..11914d37 100644
--- a/test/parse/test_notation_declaration.rb
+++ b/test/parse/test_notation_declaration.rb
@@ -32,10 +32,10 @@ def test_no_name
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: name is missing
-Line: 5
-Position: 72
+Line: 2
+Position: 62
Last 80 unconsumed characters:
- ]>
+
DETAIL
end
@@ -62,10 +62,10 @@ def test_no_id_type
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid ID type
-Line: 5
-Position: 77
+Line: 2
+Position: 67
Last 80 unconsumed characters:
-> ]>
+>
DETAIL
end
@@ -77,10 +77,10 @@ def test_invalid_id_type
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid ID type
-Line: 5
-Position: 85
+Line: 2
+Position: 75
Last 80 unconsumed characters:
- INVALID> ]>
+INVALID>
DETAIL
end
end
@@ -98,7 +98,7 @@ def test_no_literal
Line: 5
Position: 84
Last 80 unconsumed characters:
- SYSTEM> ]>
+> ]>
DETAIL
end
@@ -110,10 +110,10 @@ def test_garbage_after_literal
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: garbage before end >
-Line: 5
-Position: 103
+Line: 2
+Position: 93
Last 80 unconsumed characters:
-x'> ]>
+x'>
DETAIL
end
@@ -145,7 +145,7 @@ def test_content_double_quote
Line: 5
Position: 129
Last 80 unconsumed characters:
- PUBLIC 'double quote " is invalid' "system-literal"> ]>
+ 'double quote " is invalid' "system-literal"> ]>
DETAIL
end
@@ -173,10 +173,10 @@ def test_garbage_after_literal
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: garbage before end >
-Line: 5
-Position: 123
+Line: 2
+Position: 113
Last 80 unconsumed characters:
-x'> ]>
+x'>
DETAIL
end
@@ -229,7 +229,7 @@ def test_no_literal
Line: 5
Position: 84
Last 80 unconsumed characters:
- PUBLIC> ]>
+> ]>
DETAIL
end
@@ -244,7 +244,7 @@ def test_literal_content_double_quote
Line: 5
Position: 128
Last 80 unconsumed characters:
- PUBLIC 'double quote \" is invalid in PubidLiteral'> ]>
+ 'double quote \" is invalid in PubidLiteral'> ]>
DETAIL
end