diff --git a/src/custom_types.c b/src/custom_types.c index 6781315..ed19836 100644 --- a/src/custom_types.c +++ b/src/custom_types.c @@ -171,11 +171,14 @@ static char *re2_func_map[][2] = { {"countmatches", "countMatches"}, {"countmatchescaseinsensitive", "countMatchesCaseInsensitive"}, {"extractall", "extractAll"}, + {"extractallgroupshorizontal", "extractAllGroupsHorizontal"}, + {"extractallgroupsvertical", "extractAllGroupsVertical"}, {"extractgroups", "extractGroups"}, {"multimatchallindices", "multiMatchAllIndices"}, {"multimatchany", "multiMatchAny"}, {"multimatchanyindex", "multiMatchAnyIndex"}, {"regexpextract", "regexpExtract"}, + {"regexpquotemeta", "regexpQuoteMeta"}, {"replaceregexpall", "replaceRegexpAll"}, {"replaceregexpone", "replaceRegexpOne"}, {NULL, NULL}, @@ -557,8 +560,17 @@ chfdw_check_for_custom_function(Oid funcid) else if (STR_EQUAL(extname, "re2")) { /* pg_re2: 1:1 pushdown to ClickHouse RE2 functions. */ - entry->cf_type = CF_CH_FUNCTION; - strlcpy(entry->custom_name, re2_func_name(proname), NAMEDATALEN); + if (STR_EQUAL(proname, "re2splitbyregexp")) + { + /* CH splitByRegexp expects (pattern, haystack[, max]). */ + entry->cf_type = CF_RE2_SPLIT_BY_REGEX; + strcpy(entry->custom_name, "splitByRegexp"); + } + else + { + entry->cf_type = CF_CH_FUNCTION; + strlcpy(entry->custom_name, re2_func_name(proname), NAMEDATALEN); + } } else if (STR_EQUAL(extname, "fuzzystrmatch")) { diff --git a/src/deparse.c b/src/deparse.c index adcac2e..3134833 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -2771,6 +2771,24 @@ deparseFuncExpr(FuncExpr * node, deparse_expr_cxt * context) appendStringInfoChar(buf, ')'); return; } + case CF_RE2_SPLIT_BY_REGEX: + { + /* + * re2splitbyregexp(haystack, pattern[, max]) → + * splitByRegexp(pattern, haystack[, max]) + */ + appendStringInfoChar(buf, '('); + deparseExpr((Expr *) list_nth(node->args, 1), context); + appendStringInfoString(buf, ", "); + deparseExpr((Expr *) linitial(node->args), context); + if (list_length(node->args) >= 3) + { + appendStringInfoString(buf, ", "); + deparseExpr((Expr *) list_nth(node->args, 2), context); + } + appendStringInfoChar(buf, ')'); + return; + } case CF_REPLACE_REGEX: { /* replaceRegexpOne() or replaceRegexpAll() */ diff --git a/src/include/fdw.h b/src/include/fdw.h index 9524e8c..4a2e6d0 100644 --- a/src/include/fdw.h +++ b/src/include/fdw.h @@ -304,6 +304,9 @@ typedef enum CF_CH_FUNCTION, /* adapted clickhouse function */ CF_MATCH, /* regexp_match function */ CF_SPLIT_BY_REGEX, /* regexp_split_to_array → splitByRegexp */ + CF_RE2_SPLIT_BY_REGEX, /* re2splitbyregexp → splitByRegexp, swap + * (haystack, pattern[, max]) → (pattern, + * haystack[, max]) */ CF_REPLACE_REGEX, /* regexp_replace → replaceRegexpOne or * replaceRegexpAll */ CF_REGEX_MATCH, /* ~ POSIX regex operator */ diff --git a/test/expected/re2_functions.out b/test/expected/re2_functions.out index 3c1d014..49ee0dc 100644 --- a/test/expected/re2_functions.out +++ b/test/expected/re2_functions.out @@ -275,6 +275,84 @@ SELECT * FROM t1 WHERE re2multimatchallindices(val, VARIADIC ARRAY['POSIX','PCRE 1 | POSIX uses BRE and ERE (1 row) +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE re2regexpquotemeta(val) <> val; + QUERY PLAN +-------------------------------------------------------------------------------- + Foreign Scan on re2_test.t1 + Output: id + Remote SQL: SELECT id FROM re2_test.t1 WHERE ((regexpQuoteMeta(val) <> val)) +(3 rows) + +SELECT id FROM t1 WHERE re2regexpquotemeta(val) <> val; + id +---- +(0 rows) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ') = ARRAY['re2','uses','finite','automata']; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------- + Foreign Scan on re2_test.t1 + Output: id + Remote SQL: SELECT id FROM re2_test.t1 WHERE ((splitByRegexp(' ', val, 0) = ['re2','uses','finite','automata'])) +(3 rows) + +SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ') = ARRAY['re2','uses','finite','automata']; + id +---- + 2 +(1 row) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ', 2) = ARRAY['re2','uses']; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Foreign Scan on re2_test.t1 + Output: id + Remote SQL: SELECT id FROM re2_test.t1 WHERE ((splitByRegexp(' ', val, 2) = ['re2','uses'])) +(3 rows) + +SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ', 2) = ARRAY['re2','uses']; + id +---- + 2 +(1 row) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE array_length(re2extractallgroupsvertical(val, '(\w+) (\w+)'), 1) > 0; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Foreign Scan on re2_test.t1 + Output: id + Remote SQL: SELECT id FROM re2_test.t1 WHERE ((length(extractAllGroupsVertical(val, '(\\w+) (\\w+)')) > 0)) +(3 rows) + +SELECT id FROM t1 WHERE array_length(re2extractallgroupsvertical(val, '(\w+) (\w+)'), 1) > 0; + id +---- + 1 + 2 + 3 +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE array_length(re2extractallgroupshorizontal(val, '(\w+) (\w+)'), 1) = 2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Foreign Scan on re2_test.t1 + Output: id + Remote SQL: SELECT id FROM re2_test.t1 WHERE ((length(extractAllGroupsHorizontal(val, '(\\w+) (\\w+)')) = 2)) +(3 rows) + +SELECT id FROM t1 WHERE array_length(re2extractallgroupshorizontal(val, '(\w+) (\w+)'), 1) = 2; + id +---- + 1 + 2 + 3 +(3 rows) + DROP EXTENSION re2; DROP USER MAPPING FOR CURRENT_USER SERVER re2_svr; SELECT clickhouse_raw_query('DROP DATABASE re2_test'); diff --git a/test/sql/re2_functions.sql b/test/sql/re2_functions.sql index baed66c..d124581 100644 --- a/test/sql/re2_functions.sql +++ b/test/sql/re2_functions.sql @@ -85,6 +85,26 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multimatchallindices(val, VARIADIC ARRAY['POSIX','PCRE']) = ARRAY[1]; SELECT * FROM t1 WHERE re2multimatchallindices(val, VARIADIC ARRAY['POSIX','PCRE']) = ARRAY[1]; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE re2regexpquotemeta(val) <> val; +SELECT id FROM t1 WHERE re2regexpquotemeta(val) <> val; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ') = ARRAY['re2','uses','finite','automata']; +SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ') = ARRAY['re2','uses','finite','automata']; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ', 2) = ARRAY['re2','uses']; +SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ', 2) = ARRAY['re2','uses']; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE array_length(re2extractallgroupsvertical(val, '(\w+) (\w+)'), 1) > 0; +SELECT id FROM t1 WHERE array_length(re2extractallgroupsvertical(val, '(\w+) (\w+)'), 1) > 0; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id FROM t1 WHERE array_length(re2extractallgroupshorizontal(val, '(\w+) (\w+)'), 1) = 2; +SELECT id FROM t1 WHERE array_length(re2extractallgroupshorizontal(val, '(\w+) (\w+)'), 1) = 2; + DROP EXTENSION re2; DROP USER MAPPING FOR CURRENT_USER SERVER re2_svr; SELECT clickhouse_raw_query('DROP DATABASE re2_test');