@@ -64,9 +64,9 @@ public void testPatternsLabelMode_ShowNumberedToken_ForSimplePatternMethod() {
6464 verifyLogical (root , expectedLogical );
6565
6666 String expectedSparkSql =
67- "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN"
68- + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['pattern'] AS "
69- + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR"
67+ "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' "
68+ + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['pattern']"
69+ + " AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR"
7070 + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END,"
7171 + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n "
7272 + "FROM `scott`.`EMP`" ;
@@ -85,17 +85,17 @@ public void testPatternsLabelModeWithCustomPattern_ShowNumberedToken_ForSimplePa
8585 + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1,"
8686 + " '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR)), $1), 'pattern'))],"
8787 + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS"
88- + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR)), "
89- + " $1), 'tokens'))])\n "
88+ + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR,"
89+ + " 'g':VARCHAR)), $1), 'tokens'))])\n "
9090 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
9191 verifyLogical (root , expectedLogical );
9292
9393 String expectedSparkSql =
94- "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN"
95- + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['pattern'] AS STRING) "
96- + " `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = "
97- + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['tokens'] AS "
98- + " MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n "
94+ "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' "
95+ + " ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['pattern'] AS"
96+ + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR"
97+ + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END,"
98+ + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n "
9999 + "FROM `scott`.`EMP`" ;
100100 verifyPPLToSparkSQL (root , expectedSparkSql );
101101 }
@@ -138,12 +138,12 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_SimplePattern
138138 verifyLogical (root , expectedLogical );
139139
140140 String expectedSparkSql =
141- "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME`"
142- + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END,"
143- + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE"
144- + " WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`,"
145- + " '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) "
146- + " `tokens`\n "
141+ "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = "
142+ + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END,"
143+ + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN "
144+ + " `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`,"
145+ + " '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY"
146+ + " >) `tokens`\n "
147147 + "FROM `scott`.`EMP`" ;
148148 verifyPPLToSparkSQL (root , expectedSparkSql );
149149 }
@@ -254,9 +254,9 @@ public void testPatternsAggregationMode_NotShowNumberedToken_ForSimplePatternMet
254254
255255 String expectedSparkSql =
256256 "SELECT CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`,"
257- + " '[a-zA-Z0-9]+', '<*>', 'g') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR "
258- + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END )"
259- + " `pattern_count`, `TAKE`(`ENAME`, 10) `sample_logs`\n "
257+ + " '[a-zA-Z0-9]+', '<*>', 'g') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL"
258+ + " OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g')"
259+ + " END) `pattern_count`, `TAKE`(`ENAME`, 10) `sample_logs`\n "
260260 + "FROM `scott`.`EMP`\n "
261261 + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
262262 + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END" ;
@@ -282,12 +282,13 @@ public void testPatternsAggregationMode_ShowNumberedToken_ForSimplePatternMethod
282282
283283 String expectedSparkSql =
284284 "SELECT TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
285- + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`, 10))['pattern']"
286- + " AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN"
287- + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END) `pattern_count`,"
288- + " TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
289- + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`, 10))['tokens']"
290- + " AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10) `sample_logs`\n "
285+ + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`,"
286+ + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR"
287+ + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)"
288+ + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''"
289+ + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END,"
290+ + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`,"
291+ + " `TAKE`(`ENAME`, 10) `sample_logs`\n "
291292 + "FROM `scott`.`EMP`\n "
292293 + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
293294 + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END" ;
@@ -317,9 +318,9 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForSimplePa
317318 + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR"
318319 + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)"
319320 + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''"
320- + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`, "
321- + " 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10) "
322- + " `sample_logs`\n "
321+ + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END,"
322+ + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`,"
323+ + " `TAKE`(`ENAME`, 10) ` sample_logs`\n "
323324 + "FROM `scott`.`EMP`\n "
324325 + "GROUP BY `DEPTNO`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
325326 + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END" ;
0 commit comments