Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/core/crypto/crypto_uuid.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ auto uuidv4() -> std::string {
throw std::runtime_error("Could not generate random bytes with OpenSSL");
}
#else
static std::random_device device;
static std::mt19937 generator{device()};
thread_local std::random_device device;
thread_local std::mt19937 generator{device()};
std::uniform_int_distribution<decltype(digits)::size_type> distribution(0,
15);
std::uniform_int_distribution<decltype(variant_digits)::size_type>
Expand Down
4 changes: 3 additions & 1 deletion src/core/dns/hostname.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ auto is_hostname(const std::string_view value) -> bool {
return false;
}

// RFC 1123 §2.1: SHOULD handle host names of up to 255 characters
// RFC 1123 §2.1: SHOULD handle host names of up to 255 characters. This is
// intentionally looser than the stricter 253-octet cap applied to the
// internationalized form
if (value.size() > 255) {
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion src/core/dns/idn_hostname.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ auto is_idn_hostname(const std::string_view value) -> bool {
try {
const auto body{utf32_to_punycode(decoded)};
a_label_octets = 4 + body.size();
} catch (...) {
} catch (const PunycodeError &) {
return false;
}
} else if (*kind == IDNALabelKind::Ascii) {
Expand Down
10 changes: 6 additions & 4 deletions src/core/dns/include/sourcemeta/core/dns.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ namespace sourcemeta::core {
/// assert(!sourcemeta::core::is_hostname("example."));
/// ```
///
/// This function implements RFC 1123 §2.1 (ASCII only). It does not
/// perform A-label or Punycode decoding. For internationalized host
/// names see `is_idn_hostname`.
/// This function operates on ASCII input only and caps the total length at
/// 255 octets. Labels matching the case-insensitive "xn--" prefix are
/// additionally validated as RFC 5890 A-labels, so the Punycode body must
/// decode and round-trip.
SOURCEMETA_CORE_DNS_EXPORT
auto is_hostname(const std::string_view value) -> bool;

Expand All @@ -45,7 +46,8 @@ auto is_hostname(const std::string_view value) -> bool;
/// RFC 5891 Section 4. Each label is validated as an RFC 5890 A-label or
/// U-label (with RFC 5892 ContextJ and ContextO contextual rules and the
/// RFC 5891 §4.1.2.A NFC requirement), and the RFC 5893 Bidi rule is
/// enforced on every label of a Bidi domain name. For example:
/// enforced on every label of a Bidi domain name. The total length is capped
/// at 253 octets in A-label form. For example:
///
/// ```cpp
/// #include <sourcemeta/core/dns.h>
Expand Down
43 changes: 42 additions & 1 deletion src/core/email/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <sourcemeta/core/ip.h>

#include <cstdint> // std::uint8_t, std::uint16_t
#include <string_view> // std::string_view

namespace {
Expand Down Expand Up @@ -75,6 +76,46 @@ inline constexpr auto is_ldh_str(const std::string_view value) -> bool {
return true;
}

// RFC 5321 §4.1.3: Snum = 1*3DIGIT ; representing a decimal integer
// value in the range 0 through 255. Leading zeros are permitted, unlike
// the RFC 3986 dec-octet that backs is_ipv4
inline constexpr auto is_snum(const std::string_view value) -> bool {
if (value.empty() || value.size() > 3) {
return false;
}
std::uint16_t result{0};
for (const auto character : value) {
if (character < '0' || character > '9') {
return false;
}
result = static_cast<std::uint16_t>(
result * 10 + static_cast<std::uint16_t>(character - '0'));
}
return result <= 255;
}

// RFC 5321 §4.1.3: IPv4-address-literal = Snum 3("." Snum)
inline constexpr auto is_ipv4_address_literal(const std::string_view value)
-> bool {
std::string_view::size_type start{0};
std::uint8_t octets{0};
while (true) {
const auto dot{value.find('.', start)};
const auto octet{dot == std::string_view::npos
? value.substr(start)
: value.substr(start, dot - start)};
if (!is_snum(octet)) {
return false;
}
octets = static_cast<std::uint8_t>(octets + 1);
if (dot == std::string_view::npos) {
break;
}
start = dot + 1;
}
return octets == 4;
}

// RFC 5234 §2.3: ABNF literal strings are case-insensitive by default
// RFC 5321 §4.1.3: IPv6-address-literal prefix is the literal "IPv6:"
inline constexpr auto matches_ipv6_tag(const std::string_view value) -> bool {
Expand Down Expand Up @@ -126,7 +167,7 @@ inline auto is_address_literal(const std::string_view domain) -> bool {
// RFC 5321 §4.1.3: IPv4-address-literal has no ":";
// General-address-literal requires ":"
if (inner.find(':') == std::string_view::npos) {
return sourcemeta::core::is_ipv4(inner);
return is_ipv4_address_literal(inner);
}
return is_general_address_literal(inner);
}
Expand Down
33 changes: 32 additions & 1 deletion src/core/gzip/bit_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ class BitReader {
}

auto consume_bits(const unsigned int count) -> void {
// Consuming more bits than are buffered would underflow the unsigned
// counter. Every call site is preceded by a peek or refill that
// guarantees enough bits, so the assert documents the contract
assert(count <= this->bits_available_);
this->accumulator_ >>= count;
this->bits_available_ -= count;
}
Expand All @@ -45,6 +49,10 @@ class BitReader {
}

auto read_byte() -> std::uint8_t {
// Reading a byte while 1 to 7 bits are buffered would return a byte from
// ahead of them. Every call site is byte-aligned, so the assert documents
// the invariant without paying a release-build cost
assert(this->bits_available_ % 8 == 0);
if (this->bits_available_ >= 8) {
const auto value{static_cast<std::uint8_t>(this->accumulator_ & 0xff)};
this->accumulator_ >>= 8;
Expand All @@ -54,6 +62,22 @@ class BitReader {
return this->pull_source_byte();
}

auto try_read_byte(std::uint8_t &byte) -> bool {
assert(this->bits_available_ % 8 == 0);
if (this->bits_available_ >= 8) {
byte = static_cast<std::uint8_t>(this->accumulator_ & 0xff);
this->accumulator_ >>= 8;
this->bits_available_ -= 8;
return true;
}
if (this->buffer_position_ >= this->buffer_size_ &&
!this->try_refill_buffer()) {
return false;
}
byte = this->buffer_[this->buffer_position_++];
return true;
}

auto read_bytes(std::uint8_t *destination, const std::size_t count) -> void {
for (std::size_t index = 0; index < count; ++index) {
destination[index] = this->read_byte();
Expand Down Expand Up @@ -101,14 +125,21 @@ class BitReader {
}

auto refill_buffer() -> void {
if (!this->try_refill_buffer()) {
throw GZIPError{"Unexpected end of source stream"};
}
}

auto try_refill_buffer() -> bool {
this->source_->read(reinterpret_cast<char *>(this->buffer_.data()),
SOURCE_BUFFER_SIZE);
const auto bytes_read{static_cast<std::size_t>(this->source_->gcount())};
if (bytes_read == 0) {
throw GZIPError{"Unexpected end of source stream"};
return false;
}
this->buffer_size_ = bytes_read;
this->buffer_position_ = 0;
return true;
}

std::istream *source_;
Expand Down
9 changes: 7 additions & 2 deletions src/core/gzip/deflate.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,15 +163,20 @@ class DeflateDecoder {
for (auto &length : distance_lengths) {
length = 5;
}
this->distance_tree_.build(distance_lengths.data(),
distance_lengths.size());
this->distance_tree_.build(distance_lengths.data(), distance_lengths.size(),
true);
}

auto read_dynamic_header() -> void {
const auto hlit{this->reader_->read_bits(5) + 257};
const auto hdist{this->reader_->read_bits(5) + 1};
const auto hclen{this->reader_->read_bits(4) + 4};

// RFC 1951 section 3.2.7 caps the literal/length alphabet at 286 symbols
if (hlit > 286) {
throw GZIPError{"Too many literal/length codes"};
}

std::array<std::uint8_t, 19> code_length_lengths{};
for (std::size_t index = 0; index < hclen; ++index) {
code_length_lengths[DEFLATE_CODE_LENGTH_ORDER[index]] =
Expand Down
34 changes: 24 additions & 10 deletions src/core/gzip/gzip.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,27 @@ extern "C" {

namespace sourcemeta::core {

auto gzip(const std::uint8_t *input, const std::size_t size) -> std::string {
auto gzip(const std::uint8_t *input, const std::size_t size, const int level)
-> std::string {
std::unique_ptr<libdeflate_compressor, decltype(&libdeflate_free_compressor)>
compressor{libdeflate_alloc_compressor(1), libdeflate_free_compressor};
compressor{libdeflate_alloc_compressor(level),
libdeflate_free_compressor};
if (!compressor) {
throw GZIPError{"Could not allocate compressor"};
}

const auto max_size{libdeflate_gzip_compress_bound(compressor.get(), size)};
std::string output;
output.resize(max_size);

const auto actual_size{libdeflate_gzip_compress(
compressor.get(), input, size, output.data(), output.size())};
std::size_t actual_size{0};
// libdeflate overwrites the whole bound, so leaving the buffer uninitialised
// avoids zero-filling multi-megabyte allocations that are immediately
// discarded
output.resize_and_overwrite(
max_size, [&](char *const buffer, const std::size_t capacity) {
actual_size = libdeflate_gzip_compress(compressor.get(), input, size,
buffer, capacity);
return capacity;
});

if (actual_size == 0) {
throw GZIPError{"Could not compress input"};
Expand All @@ -44,11 +52,17 @@ auto gunzip(const std::uint8_t *input, const std::size_t size,
auto capacity{output_hint > 0 ? output_hint : size * 4};

for (;;) {
output.resize(capacity);
std::size_t actual_size{0};
const auto result{libdeflate_gzip_decompress(decompressor.get(), input,
size, output.data(),
output.size(), &actual_size)};
auto result{LIBDEFLATE_BAD_DATA};
// libdeflate writes only the decompressed bytes, so leaving the buffer
// uninitialised avoids zero-filling multi-megabyte allocations on every
// retry of the doubling loop
output.resize_and_overwrite(capacity, [&](char *const buffer,
const std::size_t buffer_size) {
result = libdeflate_gzip_decompress(decompressor.get(), input, size,
buffer, buffer_size, &actual_size);
return buffer_size;
});

if (result == LIBDEFLATE_SUCCESS) {
output.resize(actual_size);
Expand Down
16 changes: 14 additions & 2 deletions src/core/gzip/huffman.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ class HuffmanDecoder {

HuffmanDecoder() = default;

auto build(const std::uint8_t *lengths, const std::size_t length_count)
-> void {
// The fixed distance tree of RFC 1951 section 3.2.6 is intentionally
// incomplete (30 codes of length five over a 32-slot space), so the
// completeness check is suppressed for it and enforced everywhere else
auto build(const std::uint8_t *lengths, const std::size_t length_count,
const bool allow_incomplete = false) -> void {
std::ranges::fill(this->count_, std::uint16_t{0});
std::ranges::fill(this->lut_, std::uint16_t{0});

Expand All @@ -54,6 +57,15 @@ class HuffmanDecoder {
}
}

// Reject incomplete codes, matching zlib and puff. RFC 1951 sanctions
// incompleteness only for the single-code case (a tree built from one
// used code of length one), where every length is either zero or one
if (left > 0 && !allow_incomplete &&
length_count != static_cast<std::size_t>(this->count_[0]) +
static_cast<std::size_t>(this->count_[1])) {
throw GZIPError{"Incomplete Huffman code"};
}

std::array<std::uint16_t, MAX_HUFFMAN_BITS + 1> offsets{};
offsets[1] = 0;
for (unsigned int bits = 1; bits < MAX_HUFFMAN_BITS; ++bits) {
Expand Down
6 changes: 4 additions & 2 deletions src/core/gzip/include/sourcemeta/core/gzip.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
namespace sourcemeta::core {

/// @ingroup gzip
/// Compress a byte buffer using the GZIP format (RFC 1952). For example:
/// Compress a byte buffer using the GZIP format (RFC 1952). An optional
/// compression level from 0 to 12 trades speed for ratio. For example:
///
/// ```cpp
/// #include <sourcemeta/core/gzip.h>
Expand All @@ -36,7 +37,8 @@ namespace sourcemeta::core {
/// reinterpret_cast<const std::uint8_t *>(input.data()), input.size())};
/// ```
auto SOURCEMETA_CORE_GZIP_EXPORT gzip(const std::uint8_t *input,
std::size_t size) -> std::string;
std::size_t size, int level = 1)
-> std::string;

/// @ingroup gzip
/// Decompress a GZIP compressed byte buffer (RFC 1952). An optional output
Expand Down
Loading
Loading