diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f3710f..4589a08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ All notable changes to this project will be documented in this file. It uses the [Semantic Versioning]: https://semver.org/spec/v2.0.0.html "Semantic Versioning 2.0.0" +## [v0.1.2] — Unreleased + +Fixed memory safety in C++ wrapper. + + [v0.1.2]: https://github.com/clickhouse/pg_re2/compare/v0.1.1...v0.1.2 + ## [v0.1.1] — 2026-04-16 This release makes binary-only changes. Once installed, any existing use of diff --git a/META.json b/META.json index c07b5c5..7f8cc4a 100644 --- a/META.json +++ b/META.json @@ -1,7 +1,7 @@ { "name": "re2", "abstract": "ClickHouse-compatible regex functions using RE2", - "version": "0.1.1", + "version": "0.1.2", "maintainer": "Philip Dubé", "license": "postgresql", "provides": { @@ -9,7 +9,7 @@ "abstract": "ClickHouse-compatible regex functions using RE2", "docfile": "doc/re2.md", "file": "re2.control", - "version": "0.1.1" + "version": "0.1.2" } }, "prereqs": { diff --git a/doc/re2.md b/doc/re2.md index 03d0305..6e09774 100644 --- a/doc/re2.md +++ b/doc/re2.md @@ -1,4 +1,4 @@ -re2 0.1.1 +re2 0.1.2 ========= ## Synopsis diff --git a/src/re2_wrapper.cpp b/src/re2_wrapper.cpp index e2db56a..88de54f 100644 --- a/src/re2_wrapper.cpp +++ b/src/re2_wrapper.cpp @@ -15,6 +15,7 @@ extern "C" #include #include #include +#include struct re2_pattern { @@ -119,35 +120,43 @@ re2_extract_all(const re2_pattern *pat, const char *text, size_t text_len, int * int ngroups = pat->re.NumberOfCapturingGroups(); int target = ngroups > 0 ? 1 : 0; int needed = target + 1; - int capacity = 16; - int n = 0; - re2_span *out = (re2_span *)palloc(capacity * sizeof(re2_span)); size_t pos = 0; - while (pos <= text_len) - { - re2::StringPiece sub[2]; - if (!pat->re.Match(input, pos, input.size(), re2::RE2::UNANCHORED, sub, needed)) - break; + errbuf[0] = '\0'; + *count = 0; - if (n == capacity) + std::vector spans; + try + { + while (pos <= text_len) { - capacity *= 2; - out = (re2_span *)repalloc(out, capacity * sizeof(re2_span)); - } - out[n++] = sp_to_span(sub[target]); + re2::StringPiece sub[2]; + if (!pat->re.Match(input, pos, input.size(), re2::RE2::UNANCHORED, sub, needed)) + break; - size_t match_end = (sub[0].data() - text) + sub[0].size(); - pos = match_end > pos ? match_end : pos + 1; + spans.push_back(sp_to_span(sub[target])); + + size_t match_end = (sub[0].data() - text) + sub[0].size(); + pos = match_end > pos ? match_end : pos + 1; + } + } + catch (std::bad_alloc &) + { + snprintf(errbuf, errbuf_size, "out of memory"); + return NULL; } - errbuf[0] = '\0'; - *count = n; - if (n == 0) + if (spans.empty()) + return NULL; + + re2_span *out = (re2_span *)palloc_extended(spans.size() * sizeof(re2_span), MCXT_ALLOC_NO_OOM); + if (!out) { - pfree(out); + snprintf(errbuf, errbuf_size, "out of memory"); return NULL; } + memcpy(out, spans.data(), spans.size() * sizeof(re2_span)); + *count = (int)spans.size(); return out; } @@ -201,8 +210,15 @@ re2_extract_groups(const re2_pattern *pat, const char *text, size_t text_len, in return NULL; } + re2_span *out = (re2_span *)palloc_extended(ngroups * sizeof(re2_span), MCXT_ALLOC_NO_OOM); + if (!out) + { + delete[] sub; + snprintf(errbuf, errbuf_size, "out of memory"); + *count = 0; + return NULL; + } *count = ngroups; - re2_span *out = (re2_span *)palloc(ngroups * sizeof(re2_span)); for (int i = 0; i < ngroups; i++) { re2::StringPiece &g = sub[i + 1]; @@ -238,13 +254,15 @@ validate_rewrite(const re2_pattern *pat, const char *repl, size_t repl_len, char return true; } -/* palloc varlena ready for PG_RETURN_TEXT_P/PG_RETURN_BYTEA_P */ +/* palloc varlena ready for PG_RETURN_TEXT_P/PG_RETURN_BYTEA_P, NULL on OOM */ static void * make_varlena(const std::string &s) { size_t len = s.size(); - char *out = (char *)palloc(len + VARHDRSZ); + char *out = (char *)palloc_extended(len + VARHDRSZ, MCXT_ALLOC_NO_OOM); + if (!out) + return NULL; SET_VARSIZE(out, len + VARHDRSZ); memcpy(VARDATA(out), s.data(), len); return out; @@ -261,7 +279,10 @@ re2_replace_one(const re2_pattern *pat, const char *text, size_t text_len, const { std::string result(text, text_len); re2::RE2::Replace(&result, pat->re, re2::StringPiece(repl, repl_len)); - return make_varlena(result); + void *out = make_varlena(result); + if (!out) + snprintf(errbuf, errbuf_size, "out of memory"); + return out; } catch (std::bad_alloc &) { @@ -281,7 +302,10 @@ re2_replace_all(const re2_pattern *pat, const char *text, size_t text_len, const { std::string result(text, text_len); re2::RE2::GlobalReplace(&result, pat->re, re2::StringPiece(repl, repl_len)); - return make_varlena(result); + void *out = make_varlena(result); + if (!out) + snprintf(errbuf, errbuf_size, "out of memory"); + return out; } catch (std::bad_alloc &) {