Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions src/custom_types.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,14 @@ static char *re2_func_map[][2] = {
{"countmatches", "countMatches"},
{"countmatchescaseinsensitive", "countMatchesCaseInsensitive"},
{"extractall", "extractAll"},
{"extractallgroupshorizontal", "extractAllGroupsHorizontal"},
{"extractallgroupsvertical", "extractAllGroupsVertical"},
{"extractgroups", "extractGroups"},
{"multimatchallindices", "multiMatchAllIndices"},
{"multimatchany", "multiMatchAny"},
{"multimatchanyindex", "multiMatchAnyIndex"},
{"regexpextract", "regexpExtract"},
{"regexpquotemeta", "regexpQuoteMeta"},
{"replaceregexpall", "replaceRegexpAll"},
{"replaceregexpone", "replaceRegexpOne"},
{NULL, NULL},
Expand Down Expand Up @@ -557,8 +560,17 @@ chfdw_check_for_custom_function(Oid funcid)
else if (STR_EQUAL(extname, "re2"))
{
/* pg_re2: 1:1 pushdown to ClickHouse RE2 functions. */
entry->cf_type = CF_CH_FUNCTION;
strlcpy(entry->custom_name, re2_func_name(proname), NAMEDATALEN);
if (STR_EQUAL(proname, "re2splitbyregexp"))
{
/* CH splitByRegexp expects (pattern, haystack[, max]). */
entry->cf_type = CF_RE2_SPLIT_BY_REGEX;
strcpy(entry->custom_name, "splitByRegexp");
}
else
{
entry->cf_type = CF_CH_FUNCTION;
strlcpy(entry->custom_name, re2_func_name(proname), NAMEDATALEN);
}
}
else if (STR_EQUAL(extname, "fuzzystrmatch"))
{
Expand Down
18 changes: 18 additions & 0 deletions src/deparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -2771,6 +2771,24 @@ deparseFuncExpr(FuncExpr * node, deparse_expr_cxt * context)
appendStringInfoChar(buf, ')');
return;
}
case CF_RE2_SPLIT_BY_REGEX:
{
/*
* re2splitbyregexp(haystack, pattern[, max]) →
* splitByRegexp(pattern, haystack[, max])
*/
appendStringInfoChar(buf, '(');
deparseExpr((Expr *) list_nth(node->args, 1), context);
appendStringInfoString(buf, ", ");
deparseExpr((Expr *) linitial(node->args), context);
if (list_length(node->args) >= 3)
{
appendStringInfoString(buf, ", ");
deparseExpr((Expr *) list_nth(node->args, 2), context);
}
appendStringInfoChar(buf, ')');
return;
}
case CF_REPLACE_REGEX:
{
/* replaceRegexpOne() or replaceRegexpAll() */
Expand Down
3 changes: 3 additions & 0 deletions src/include/fdw.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ typedef enum
CF_CH_FUNCTION, /* adapted clickhouse function */
CF_MATCH, /* regexp_match function */
CF_SPLIT_BY_REGEX, /* regexp_split_to_array → splitByRegexp */
CF_RE2_SPLIT_BY_REGEX, /* re2splitbyregexp → splitByRegexp, swap
* (haystack, pattern[, max]) → (pattern,
* haystack[, max]) */
CF_REPLACE_REGEX, /* regexp_replace → replaceRegexpOne or
* replaceRegexpAll */
CF_REGEX_MATCH, /* ~ POSIX regex operator */
Expand Down
78 changes: 78 additions & 0 deletions test/expected/re2_functions.out
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,84 @@ SELECT * FROM t1 WHERE re2multimatchallindices(val, VARIADIC ARRAY['POSIX','PCRE
1 | POSIX uses BRE and ERE
(1 row)

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE re2regexpquotemeta(val) <> val;
QUERY PLAN
--------------------------------------------------------------------------------
Foreign Scan on re2_test.t1
Output: id
Remote SQL: SELECT id FROM re2_test.t1 WHERE ((regexpQuoteMeta(val) <> val))
(3 rows)

SELECT id FROM t1 WHERE re2regexpquotemeta(val) <> val;
id
----
(0 rows)

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ') = ARRAY['re2','uses','finite','automata'];
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Foreign Scan on re2_test.t1
Output: id
Remote SQL: SELECT id FROM re2_test.t1 WHERE ((splitByRegexp(' ', val, 0) = ['re2','uses','finite','automata']))
(3 rows)

SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ') = ARRAY['re2','uses','finite','automata'];
id
----
2
(1 row)

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ', 2) = ARRAY['re2','uses'];
QUERY PLAN
------------------------------------------------------------------------------------------------
Foreign Scan on re2_test.t1
Output: id
Remote SQL: SELECT id FROM re2_test.t1 WHERE ((splitByRegexp(' ', val, 2) = ['re2','uses']))
(3 rows)

SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ', 2) = ARRAY['re2','uses'];
id
----
2
(1 row)

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE array_length(re2extractallgroupsvertical(val, '(\w+) (\w+)'), 1) > 0;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Foreign Scan on re2_test.t1
Output: id
Remote SQL: SELECT id FROM re2_test.t1 WHERE ((length(extractAllGroupsVertical(val, '(\\w+) (\\w+)')) > 0))
(3 rows)

SELECT id FROM t1 WHERE array_length(re2extractallgroupsvertical(val, '(\w+) (\w+)'), 1) > 0;
id
----
1
2
3
(3 rows)

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE array_length(re2extractallgroupshorizontal(val, '(\w+) (\w+)'), 1) = 2;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Foreign Scan on re2_test.t1
Output: id
Remote SQL: SELECT id FROM re2_test.t1 WHERE ((length(extractAllGroupsHorizontal(val, '(\\w+) (\\w+)')) = 2))
(3 rows)

SELECT id FROM t1 WHERE array_length(re2extractallgroupshorizontal(val, '(\w+) (\w+)'), 1) = 2;
id
----
1
2
3
(3 rows)

DROP EXTENSION re2;
DROP USER MAPPING FOR CURRENT_USER SERVER re2_svr;
SELECT clickhouse_raw_query('DROP DATABASE re2_test');
Expand Down
20 changes: 20 additions & 0 deletions test/sql/re2_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,26 @@ EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM t1 WHERE re2multimatchallindices(val, VARIADIC ARRAY['POSIX','PCRE']) = ARRAY[1];
SELECT * FROM t1 WHERE re2multimatchallindices(val, VARIADIC ARRAY['POSIX','PCRE']) = ARRAY[1];

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE re2regexpquotemeta(val) <> val;
SELECT id FROM t1 WHERE re2regexpquotemeta(val) <> val;

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ') = ARRAY['re2','uses','finite','automata'];
SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ') = ARRAY['re2','uses','finite','automata'];

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ', 2) = ARRAY['re2','uses'];
SELECT id FROM t1 WHERE re2splitbyregexp(val, ' ', 2) = ARRAY['re2','uses'];

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE array_length(re2extractallgroupsvertical(val, '(\w+) (\w+)'), 1) > 0;
SELECT id FROM t1 WHERE array_length(re2extractallgroupsvertical(val, '(\w+) (\w+)'), 1) > 0;

EXPLAIN (VERBOSE, COSTS OFF)
SELECT id FROM t1 WHERE array_length(re2extractallgroupshorizontal(val, '(\w+) (\w+)'), 1) = 2;
SELECT id FROM t1 WHERE array_length(re2extractallgroupshorizontal(val, '(\w+) (\w+)'), 1) = 2;

DROP EXTENSION re2;
DROP USER MAPPING FOR CURRENT_USER SERVER re2_svr;
SELECT clickhouse_raw_query('DROP DATABASE re2_test');
Expand Down
Loading