sourcemeta · jviotti · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/src/core/crypto/crypto_uuid.cc b/src/core/crypto/crypto_uuid.cc
@@ -40,8 +40,8 @@ auto uuidv4() -> std::string {
     throw std::runtime_error("Could not generate random bytes with OpenSSL");
   }
 #else
-  static std::random_device device;
-  static std::mt19937 generator{device()};
+  thread_local std::random_device device;
+  thread_local std::mt19937 generator{device()};
   std::uniform_int_distribution<decltype(digits)::size_type> distribution(0,
                                                                           15);
   std::uniform_int_distribution<decltype(variant_digits)::size_type>

diff --git a/src/core/dns/hostname.cc b/src/core/dns/hostname.cc
@@ -20,7 +20,9 @@ auto is_hostname(const std::string_view value) -> bool {
     return false;
   }
 
-  // RFC 1123 §2.1: SHOULD handle host names of up to 255 characters
+  // RFC 1123 §2.1: SHOULD handle host names of up to 255 characters. This is
+  // intentionally looser than the stricter 253-octet cap applied to the
+  // internationalized form
   if (value.size() > 255) {
     return false;
   }

diff --git a/src/core/dns/idn_hostname.cc b/src/core/dns/idn_hostname.cc
@@ -68,7 +68,7 @@ auto is_idn_hostname(const std::string_view value) -> bool {
       try {
         const auto body{utf32_to_punycode(decoded)};
         a_label_octets = 4 + body.size();
-      } catch (...) {
+      } catch (const PunycodeError &) {
         return false;
       }
     } else if (*kind == IDNALabelKind::Ascii) {

diff --git a/src/core/dns/include/sourcemeta/core/dns.h b/src/core/dns/include/sourcemeta/core/dns.h
@@ -34,9 +34,10 @@ namespace sourcemeta::core {
 /// assert(!sourcemeta::core::is_hostname("example."));
 /// ```
 ///
-/// This function implements RFC 1123 §2.1 (ASCII only). It does not
-/// perform A-label or Punycode decoding. For internationalized host
-/// names see `is_idn_hostname`.
+/// This function operates on ASCII input only and caps the total length at
+/// 255 octets. Labels matching the case-insensitive "xn--" prefix are
+/// additionally validated as RFC 5890 A-labels, so the Punycode body must
+/// decode and round-trip.
 SOURCEMETA_CORE_DNS_EXPORT
 auto is_hostname(const std::string_view value) -> bool;
 
@@ -45,7 +46,8 @@ auto is_hostname(const std::string_view value) -> bool;
 /// RFC 5891 Section 4. Each label is validated as an RFC 5890 A-label or
 /// U-label (with RFC 5892 ContextJ and ContextO contextual rules and the
 /// RFC 5891 §4.1.2.A NFC requirement), and the RFC 5893 Bidi rule is
-/// enforced on every label of a Bidi domain name. For example:
+/// enforced on every label of a Bidi domain name. The total length is capped
+/// at 253 octets in A-label form. For example:
 ///
 /// ```cpp
 /// #include <sourcemeta/core/dns.h>

diff --git a/src/core/email/helpers.h b/src/core/email/helpers.h
@@ -3,6 +3,7 @@
 
 #include <sourcemeta/core/ip.h>
 
+#include <cstdint>     // std::uint8_t, std::uint16_t
 #include <string_view> // std::string_view
 
 namespace {
@@ -75,6 +76,46 @@ inline constexpr auto is_ldh_str(const std::string_view value) -> bool {
   return true;
 }
 
+// RFC 5321 §4.1.3: Snum = 1*3DIGIT ; representing a decimal integer
+// value in the range 0 through 255. Leading zeros are permitted, unlike
+// the RFC 3986 dec-octet that backs is_ipv4
+inline constexpr auto is_snum(const std::string_view value) -> bool {
+  if (value.empty() || value.size() > 3) {
+    return false;
+  }
+  std::uint16_t result{0};
+  for (const auto character : value) {
+    if (character < '0' || character > '9') {
+      return false;
+    }
+    result = static_cast<std::uint16_t>(
+        result * 10 + static_cast<std::uint16_t>(character - '0'));
+  }
+  return result <= 255;
+}
+
+// RFC 5321 §4.1.3: IPv4-address-literal = Snum 3("." Snum)
+inline constexpr auto is_ipv4_address_literal(const std::string_view value)
+    -> bool {
+  std::string_view::size_type start{0};
+  std::uint8_t octets{0};
+  while (true) {
+    const auto dot{value.find('.', start)};
+    const auto octet{dot == std::string_view::npos
+                         ? value.substr(start)
+                         : value.substr(start, dot - start)};
+    if (!is_snum(octet)) {
+      return false;
+    }
+    octets = static_cast<std::uint8_t>(octets + 1);
+    if (dot == std::string_view::npos) {
+      break;
+    }
+    start = dot + 1;
+  }
+  return octets == 4;
+}
+
 // RFC 5234 §2.3: ABNF literal strings are case-insensitive by default
 // RFC 5321 §4.1.3: IPv6-address-literal prefix is the literal "IPv6:"
 inline constexpr auto matches_ipv6_tag(const std::string_view value) -> bool {
@@ -126,7 +167,7 @@ inline auto is_address_literal(const std::string_view domain) -> bool {
   // RFC 5321 §4.1.3: IPv4-address-literal has no ":";
   // General-address-literal requires ":"
   if (inner.find(':') == std::string_view::npos) {
-    return sourcemeta::core::is_ipv4(inner);
+    return is_ipv4_address_literal(inner);
   }
   return is_general_address_literal(inner);
 }

diff --git a/src/core/gzip/bit_reader.h b/src/core/gzip/bit_reader.h
@@ -34,6 +34,10 @@ class BitReader {
   }
 
   auto consume_bits(const unsigned int count) -> void {
+    // Consuming more bits than are buffered would underflow the unsigned
+    // counter. Every call site is preceded by a peek or refill that
+    // guarantees enough bits, so the assert documents the contract
+    assert(count <= this->bits_available_);
     this->accumulator_ >>= count;
     this->bits_available_ -= count;
   }
@@ -45,6 +49,10 @@ class BitReader {
   }
 
   auto read_byte() -> std::uint8_t {
+    // Reading a byte while 1 to 7 bits are buffered would return a byte from
+    // ahead of them. Every call site is byte-aligned, so the assert documents
+    // the invariant without paying a release-build cost
+    assert(this->bits_available_ % 8 == 0);
     if (this->bits_available_ >= 8) {
       const auto value{static_cast<std::uint8_t>(this->accumulator_ & 0xff)};
       this->accumulator_ >>= 8;
@@ -54,6 +62,22 @@ class BitReader {
     return this->pull_source_byte();
   }
 
+  auto try_read_byte(std::uint8_t &byte) -> bool {
+    assert(this->bits_available_ % 8 == 0);
+    if (this->bits_available_ >= 8) {
+      byte = static_cast<std::uint8_t>(this->accumulator_ & 0xff);
+      this->accumulator_ >>= 8;
+      this->bits_available_ -= 8;
+      return true;
+    }
+    if (this->buffer_position_ >= this->buffer_size_ &&
+        !this->try_refill_buffer()) {
+      return false;
+    }
+    byte = this->buffer_[this->buffer_position_++];
+    return true;
+  }
+
   auto read_bytes(std::uint8_t *destination, const std::size_t count) -> void {
     for (std::size_t index = 0; index < count; ++index) {
       destination[index] = this->read_byte();
@@ -101,14 +125,21 @@ class BitReader {
   }
 
   auto refill_buffer() -> void {
+    if (!this->try_refill_buffer()) {
+      throw GZIPError{"Unexpected end of source stream"};
+    }
+  }
+
+  auto try_refill_buffer() -> bool {
     this->source_->read(reinterpret_cast<char *>(this->buffer_.data()),
                         SOURCE_BUFFER_SIZE);
     const auto bytes_read{static_cast<std::size_t>(this->source_->gcount())};
     if (bytes_read == 0) {
-      throw GZIPError{"Unexpected end of source stream"};
+      return false;
     }
     this->buffer_size_ = bytes_read;
     this->buffer_position_ = 0;
+    return true;
   }
 
   std::istream *source_;

diff --git a/src/core/gzip/deflate.h b/src/core/gzip/deflate.h
@@ -163,15 +163,20 @@ class DeflateDecoder {
     for (auto &length : distance_lengths) {
       length = 5;
     }
-    this->distance_tree_.build(distance_lengths.data(),
-                               distance_lengths.size());
+    this->distance_tree_.build(distance_lengths.data(), distance_lengths.size(),
+                               true);
   }
 
   auto read_dynamic_header() -> void {
     const auto hlit{this->reader_->read_bits(5) + 257};
     const auto hdist{this->reader_->read_bits(5) + 1};
     const auto hclen{this->reader_->read_bits(4) + 4};
 
+    // RFC 1951 section 3.2.7 caps the literal/length alphabet at 286 symbols
+    if (hlit > 286) {
+      throw GZIPError{"Too many literal/length codes"};
+    }
+
     std::array<std::uint8_t, 19> code_length_lengths{};
     for (std::size_t index = 0; index < hclen; ++index) {
       code_length_lengths[DEFLATE_CODE_LENGTH_ORDER[index]] =

diff --git a/src/core/gzip/gzip.cc b/src/core/gzip/gzip.cc
@@ -8,19 +8,27 @@ extern "C" {
 
 namespace sourcemeta::core {
 
-auto gzip(const std::uint8_t *input, const std::size_t size) -> std::string {
+auto gzip(const std::uint8_t *input, const std::size_t size, const int level)
+    -> std::string {
   std::unique_ptr<libdeflate_compressor, decltype(&libdeflate_free_compressor)>
-      compressor{libdeflate_alloc_compressor(1), libdeflate_free_compressor};
+      compressor{libdeflate_alloc_compressor(level),
+                 libdeflate_free_compressor};
   if (!compressor) {
     throw GZIPError{"Could not allocate compressor"};
   }
 
   const auto max_size{libdeflate_gzip_compress_bound(compressor.get(), size)};
   std::string output;
-  output.resize(max_size);
-
-  const auto actual_size{libdeflate_gzip_compress(
-      compressor.get(), input, size, output.data(), output.size())};
+  std::size_t actual_size{0};
+  // libdeflate overwrites the whole bound, so leaving the buffer uninitialised
+  // avoids zero-filling multi-megabyte allocations that are immediately
+  // discarded
+  output.resize_and_overwrite(
+      max_size, [&](char *const buffer, const std::size_t capacity) {
+        actual_size = libdeflate_gzip_compress(compressor.get(), input, size,
+                                               buffer, capacity);
+        return capacity;
+      });
 
   if (actual_size == 0) {
     throw GZIPError{"Could not compress input"};
@@ -44,11 +52,17 @@ auto gunzip(const std::uint8_t *input, const std::size_t size,
   auto capacity{output_hint > 0 ? output_hint : size * 4};
 
   for (;;) {
-    output.resize(capacity);
     std::size_t actual_size{0};
-    const auto result{libdeflate_gzip_decompress(decompressor.get(), input,
-                                                 size, output.data(),
-                                                 output.size(), &actual_size)};
+    auto result{LIBDEFLATE_BAD_DATA};
+    // libdeflate writes only the decompressed bytes, so leaving the buffer
+    // uninitialised avoids zero-filling multi-megabyte allocations on every
+    // retry of the doubling loop
+    output.resize_and_overwrite(capacity, [&](char *const buffer,
+                                              const std::size_t buffer_size) {
+      result = libdeflate_gzip_decompress(decompressor.get(), input, size,
+                                          buffer, buffer_size, &actual_size);
+      return buffer_size;
+    });
 
     if (result == LIBDEFLATE_SUCCESS) {
       output.resize(actual_size);

diff --git a/src/core/gzip/huffman.h b/src/core/gzip/huffman.h
@@ -28,8 +28,11 @@ class HuffmanDecoder {
 
   HuffmanDecoder() = default;
 
-  auto build(const std::uint8_t *lengths, const std::size_t length_count)
-      -> void {
+  // The fixed distance tree of RFC 1951 section 3.2.6 is intentionally
+  // incomplete (30 codes of length five over a 32-slot space), so the
+  // completeness check is suppressed for it and enforced everywhere else
+  auto build(const std::uint8_t *lengths, const std::size_t length_count,
+             const bool allow_incomplete = false) -> void {
     std::ranges::fill(this->count_, std::uint16_t{0});
     std::ranges::fill(this->lut_, std::uint16_t{0});
 
@@ -54,6 +57,15 @@ class HuffmanDecoder {
       }
     }
 
+    // Reject incomplete codes, matching zlib and puff. RFC 1951 sanctions
+    // incompleteness only for the single-code case (a tree built from one
+    // used code of length one), where every length is either zero or one
+    if (left > 0 && !allow_incomplete &&
+        length_count != static_cast<std::size_t>(this->count_[0]) +
+                            static_cast<std::size_t>(this->count_[1])) {
+      throw GZIPError{"Incomplete Huffman code"};
+    }
+
     std::array<std::uint16_t, MAX_HUFFMAN_BITS + 1> offsets{};
     offsets[1] = 0;
     for (unsigned int bits = 1; bits < MAX_HUFFMAN_BITS; ++bits) {

diff --git a/src/core/gzip/include/sourcemeta/core/gzip.h b/src/core/gzip/include/sourcemeta/core/gzip.h
@@ -26,7 +26,8 @@
 namespace sourcemeta::core {
 
 /// @ingroup gzip
-/// Compress a byte buffer using the GZIP format (RFC 1952). For example:
+/// Compress a byte buffer using the GZIP format (RFC 1952). An optional
+/// compression level from 0 to 12 trades speed for ratio. For example:
 ///
 /// ```cpp
 /// #include <sourcemeta/core/gzip.h>
@@ -36,7 +37,8 @@ namespace sourcemeta::core {
 ///     reinterpret_cast<const std::uint8_t *>(input.data()), input.size())};
 /// ```
 auto SOURCEMETA_CORE_GZIP_EXPORT gzip(const std::uint8_t *input,
-                                      std::size_t size) -> std::string;
+                                      std::size_t size, int level = 1)
+    -> std::string;
 
 /// @ingroup gzip
 /// Decompress a GZIP compressed byte buffer (RFC 1952). An optional output