From 15308e63000706d5e9ff034c864098222b3a6846 Mon Sep 17 00:00:00 2001 From: nogginly Date: Sun, 15 Jan 2023 22:51:44 -0500 Subject: [PATCH 1/5] Add benchmark run target --- shard.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/shard.yml b/shard.yml index 36601fe..c93fc7e 100644 --- a/shard.yml +++ b/shard.yml @@ -7,3 +7,7 @@ authors: crystal: 0.26.1 license: MIT + +targets: + benchmark: + main: benchmark/compare.cr From 11e4829ac8e8adcf4be4cc7e270a76b6317bdddc Mon Sep 17 00:00:00 2001 From: nogginly Date: Sun, 15 Jan 2023 22:52:27 -0500 Subject: [PATCH 2/5] Tweak to run with latest Crystal --- benchmark/compare.cr | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/benchmark/compare.cr b/benchmark/compare.cr index fe55ea7..36b9207 100644 --- a/benchmark/compare.cr +++ b/benchmark/compare.cr @@ -1,7 +1,8 @@ require "../src/crystal-dfa" require "benchmark" -rx1, rx2 = nil, nil +rx1 = nil +rx2 = nil expression = /(?:x+x+)+y/ string = "xxxxxxxxxxxxxy" # expression = /"([^"\\]|\\.)*"/ @@ -13,26 +14,27 @@ string = "xxxxxxxxxxxxxy" puts puts %{building "#{expression}" with Regex (PCRE)} puts Benchmark.measure { rx1 = Regex.new(expression.source) } -rx1 = rx1.not_nil! +rx1ok = rx1.not_nil! + puts %{building "#{expression}" with RegExp (own impl} -puts Benchmark.measure { rx2 = rx1.cr } -rx2 = rx2.not_nil! +puts Benchmark.measure { rx2 = DFA::RegExp.new(expression.source) } # rx1ok.cr } +rx2ok = rx2.not_nil! puts puts %{matching "#{string}" a first time with Regex (PCRE)} -puts Benchmark.measure { rx1.match string } -pp rx1.match string +puts Benchmark.measure { rx1ok.match string } +pp rx1ok.match string puts puts %{matching "#{string}" a first time with RegExp (own impl} -puts Benchmark.measure { rx2.match string } -pp rx2.match string +puts Benchmark.measure { rx2ok.match string } +pp rx2ok.match string puts -Benchmark.measure { rx1.not_nil!.match string } -Benchmark.measure { rx2.not_nil!.match string } +Benchmark.measure { rx1ok.match string } +Benchmark.measure { rx2ok.match string } Benchmark.ips do |x| - x.report("Regex (PCRE) matching : #{string}") { rx1.not_nil!.match string } - x.report("RegExp (own impl) matching : #{string}") { rx2.not_nil!.match string } + x.report("Regex (PCRE) matching : #{string}") { rx2ok.match string } + x.report("RegExp (own impl) matching : #{string}") { rx2ok.match string } end puts From 40db898d9f75618df96efa935e7df18848b1f3f8 Mon Sep 17 00:00:00 2001 From: nogginly Date: Sun, 15 Jan 2023 22:53:08 -0500 Subject: [PATCH 3/5] Fix to nest `it` within `context` --- spec/crystal-dfa/nfa_spec.cr | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/crystal-dfa/nfa_spec.cr b/spec/crystal-dfa/nfa_spec.cr index 21dc54b..2e10adf 100644 --- a/spec/crystal-dfa/nfa_spec.cr +++ b/spec/crystal-dfa/nfa_spec.cr @@ -8,7 +8,7 @@ describe DFA::NFA do DFA::NFA.create_nfa(ast).should eq expected end - it "creates a state for a ConcateNode" do + context "creates a state for a ConcateNode" do it "works for the binary case" do ast = DFA::AST::ConcatNode.new [ DFA::AST::LiteralNode.new('a').as(DFA::AST::ASTNode), @@ -31,7 +31,7 @@ describe DFA::NFA do end end - it "creates a state for an AlternationNode" do + context "creates a state for an AlternationNode" do it "works for the binary case" do ast = DFA::AST::AlternationNode.new [ DFA::AST::LiteralNode.new('a').as(DFA::AST::ASTNode), @@ -99,7 +99,7 @@ describe DFA::NFA do DFA::NFA.create_nfa(ast).should eq expected end - it "creates a state for a CharacterClassNode([a-z]) One-or-More" do + context "creates a state for a CharacterClassNode([a-z]) One-or-More" do it "creates a state for the simple range case [a-z]" do ast = DFA::AST::CharacterClassNode.new(false, Array(String).new, [('a'..'z')]) expected = r_state('a', 'z') From 65f511404f2df5cc600e2f34f4f471c5feed8764 Mon Sep 17 00:00:00 2001 From: nogginly Date: Sun, 15 Jan 2023 22:53:40 -0500 Subject: [PATCH 4/5] Fix to match Tupe(*T) in latest Crystal --- src/core_ext/range.cr | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core_ext/range.cr b/src/core_ext/range.cr index 7722575..8495697 100644 --- a/src/core_ext/range.cr +++ b/src/core_ext/range.cr @@ -13,10 +13,9 @@ module IntersectionMethods(T) end macro included - def disjoin(other : self) [other] + (self - other) - #[self] + (other - self) + # [self] + (other - self) end end @@ -47,7 +46,7 @@ module IntersectionMethods(T) end end -struct Tuple(T) +struct Tuple(*T) include IntersectionMethods(T) def -(other : self) From 88bedd2117d1cba91200ce331b4810991e2a329f Mon Sep 17 00:00:00 2001 From: nogginly Date: Sun, 15 Jan 2023 22:54:16 -0500 Subject: [PATCH 5/5] Specify return type for latest Crystal --- src/crystal-dfa/parser.cr | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/crystal-dfa/parser.cr b/src/crystal-dfa/parser.cr index b3413e6..3eb08b1 100644 --- a/src/crystal-dfa/parser.cr +++ b/src/crystal-dfa/parser.cr @@ -143,23 +143,23 @@ module DFA end class NameParslet < PrefixParslet - def parse(parser, token) + def parse(parser, token) : AST::ASTNode AST::LiteralNode.new(token[:value].not_nil!) end end class AnyCharacterParslet < PrefixParslet - def parse(parser, token) + def parse(parser, token) : AST::ASTNode AST::CharacterClassNode.new(false, Array(String).new, ANY_CHAR_RANGES) end end class SpecialCharacterClassParslet < PrefixParslet - def parse(parser, token) + def parse(parser, token) : AST::ASTNode _next = parser.consume raise "unexpected end of input" unless _next - value = _next[:type] == :LITERAL ? _next[:value] : # translate specia characters back to + value = _next[:type] == :LITERAL ? _next[:value] : # translate specia characters back to # their string representation because # we won't interprete them inside a # characterclass @@ -180,7 +180,7 @@ Lexer::IDENTIFIERS.key_for(_next[:type]) end class GroupParslet < PrefixParslet - def parse(parser, token) + def parse(parser, token) : AST::ASTNode # ignore non capturing group designators if (_peek = parser.peek) && _peek[:type] == :QSTM @@ -199,7 +199,7 @@ Lexer::IDENTIFIERS.key_for(_next[:type]) end class CharacterRangeParslet < InfixParslet - def parse(parser, left, token) + def parse(parser, left, token) : AST::ASTNode parser.consume(:MINUS) right = parser.parseExpression(precedence) @@ -218,7 +218,7 @@ Lexer::IDENTIFIERS.key_for(_next[:type]) end class CharacterClassParslet < PrefixParslet - def parse(parser, token) + def parse(parser, token) : AST::ASTNode negate = (peek = parser.peek) && (peek[:type] == :NEGATE) && parser.consume ? true : false @@ -266,7 +266,7 @@ Lexer::IDENTIFIERS.key_for(_next[:type]) end class ConcatParslet < InfixParslet - def parse(parser, left : AST::ASTNode, token) + def parse(parser, left : AST::ASTNode, token) : AST::ASTNode exp = AST::ConcatNode.new([left.as(AST::ASTNode)]) _next = parser.parseExpression(Precedence[:LITERAL] - 1).as(AST::ASTNode?) @@ -282,7 +282,7 @@ Lexer::IDENTIFIERS.key_for(_next[:type]) end class CurlyQuantifierParslet < InfixParslet - def parse(parser, left : AST::ASTNode, token) + def parse(parser, left : AST::ASTNode, token) : AST::ASTNode parser.consume(:LCURLY) values = parse_quantifications( @@ -318,14 +318,14 @@ Lexer::IDENTIFIERS.key_for(_next[:type]) end class QuantifierParslet(T) < InfixParslet - def parse(parser, left : AST::ASTNode, token) + def parse(parser, left : AST::ASTNode, token) : AST::ASTNode parser.consume T.new(left) end end class AlternationParslet < InfixParslet - def parse(parser, left : AST::ASTNode, token) + def parse(parser, left : AST::ASTNode, token) : AST::ASTNode exp = AST::AlternationNode.new([left.as(AST::ASTNode)]) while (peek = parser.peek) && (peek[:type] == :PIPE) parser.consume