Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions src/http/include/sourcemeta/one/http_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

#include <cassert> // assert
#include <chrono> // std::chrono::system_clock
#include <cstddef> // std::size_t
#include <format> // std::format
#include <mutex> // std::mutex, std::lock_guard
#include <optional> // std::optional
#include <print> // std::print
#include <sstream> // std::ostringstream
#include <string> // std::string
Expand Down Expand Up @@ -43,11 +45,14 @@ inline auto send_response(const sourcemeta::core::HTTPStatus &status,
std::format("{} {} {}", status.wire, request.method(), request.path()));
}

inline auto send_response(const sourcemeta::core::HTTPStatus &status,
const HTTPRequest &request, HTTPResponse &response,
const std::string &message,
const Encoding current_encoding) -> void {
response.send(request, message, current_encoding);
inline auto send_response(
const sourcemeta::core::HTTPStatus &status, const HTTPRequest &request,
HTTPResponse &response, const std::string &message,
const Encoding current_encoding,
const std::optional<std::size_t> precomputed_compressed_size = std::nullopt)
-> void {
response.send(request, message, current_encoding,
precomputed_compressed_size);
HTTP_LOG(
std::format("{} {} {}", status.wire, request.method(), request.path()));
}
Expand Down
29 changes: 22 additions & 7 deletions src/http/include/sourcemeta/one/http_response.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

#include <sourcemeta/one/http_uwebsockets.h>

#include <cstddef> // std::size_t
#include <cstdint> // std::uint8_t
#include <optional> // std::optional
#include <string> // std::string
#include <string_view> // std::string_view
#include <utility> // std::move
Expand Down Expand Up @@ -36,20 +38,33 @@ class HTTPResponse {

template <typename Request>
auto send(const Request &request, const std::string &message,
const Encoding current_encoding) -> void {
const Encoding current_encoding,
const std::optional<std::size_t> precomputed_compressed_size =
std::nullopt) -> void {
const auto method{request.method()};
const auto expected_encoding{request.response_encoding()};
if (expected_encoding == Encoding::GZIP) {
this->response_->writeHeader("Content-Encoding", "gzip");
if (current_encoding == Encoding::Identity) {
auto effective_message{sourcemeta::core::gzip(
reinterpret_cast<const std::uint8_t *>(message.data()),
message.size())};
if (method == "head") {
this->response_->endWithoutBody(effective_message.size());
// RFC 9110 §9.3.2: a HEAD response must carry the same
// representation metadata (including Content-Length) as the
// matching GET would. When the artifact is identity-stored
// but the wire encoding is gzip, the metapack carries the
// precomputed compressed size, so we can answer HEAD without
// running the compressor just to discard its output.
if (method == "head" && precomputed_compressed_size.has_value()) {
this->response_->endWithoutBody(precomputed_compressed_size);

@augmentcode augmentcode Bot Jun 9, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

endWithoutBody is called with precomputed_compressed_size (an std::optional<std::size_t>), which likely needs the contained numeric value; otherwise this risks a compile error and/or an incorrect Content-Length for HEAD responses.

Severity: high

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

this->response_->end();
} else {
this->response_->end(std::move(effective_message));
auto effective_message{sourcemeta::core::gzip(
reinterpret_cast<const std::uint8_t *>(message.data()),
message.size())};
if (method == "head") {
this->response_->endWithoutBody(effective_message.size());
this->response_->end();
} else {
this->response_->end(std::move(effective_message));
}
}
} else {
if (method == "head") {
Expand Down
11 changes: 10 additions & 1 deletion src/metapack/include/sourcemeta/one/metapack.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace sourcemeta::one {

static constexpr std::uint32_t METAPACK_MAGIC{0x4154454D};
static constexpr std::uint16_t METAPACK_VERSION{1};
static constexpr std::uint16_t METAPACK_VERSION{2};

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Changing METAPACK_VERSION to 2 without a backward-compatible v1 read path makes existing metapack artifacts unreadable at runtime.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At src/metapack/include/sourcemeta/one/metapack.h, line 22:

<comment>Changing `METAPACK_VERSION` to 2 without a backward-compatible v1 read path makes existing metapack artifacts unreadable at runtime.</comment>

<file context>
@@ -19,7 +19,7 @@
 
 static constexpr std::uint32_t METAPACK_MAGIC{0x4154454D};
-static constexpr std::uint16_t METAPACK_VERSION{1};
+static constexpr std::uint16_t METAPACK_VERSION{2};
 static constexpr std::uint64_t METAPACK_MAX_DECOMPRESSION_RATIO{1024};
 
</file context>

static constexpr std::uint64_t METAPACK_MAX_DECOMPRESSION_RATIO{1024};

enum class MetapackEncoding : std::uint8_t { Identity = 0, GZIP = 1 };
Expand All @@ -32,6 +32,14 @@ struct MetapackHeader {
std::uint8_t reserved;
std::int64_t last_modified;
std::uint64_t content_bytes;
// The size in bytes of the artifact after applying the compression
// matching the supported wire content-coding. The codec is gzip
// today, the field name stays compression-agnostic so a future codec
// swap is mechanical. Precomputed at index time so the server can
// answer HEAD with Content-Encoding set without compressing on the
// fly only to discard the bytes. For compressed-storage artifacts
// this equals the payload size on disk.
std::uint64_t compressed_bytes;
std::int64_t duration;
std::array<std::uint8_t, 32> checksum;
std::uint16_t mime_length;
Expand All @@ -44,6 +52,7 @@ struct MetapackInfo {
std::string mime;
MetapackEncoding encoding;
std::uint64_t content_bytes;
std::uint64_t compressed_bytes;
std::chrono::milliseconds duration;
};

Expand Down
17 changes: 12 additions & 5 deletions src/metapack/metapack.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ static auto write_binary_header(std::ostream &output,
const std::span<const std::uint8_t> extension,
const std::chrono::milliseconds duration,
const std::string_view payload,
const std::size_t uncompressed_size) -> void {
const std::size_t uncompressed_size,
const std::size_t compressed_size) -> void {
MetapackHeader header{};
header.magic = METAPACK_MAGIC;
header.format_version = METAPACK_VERSION;
Expand All @@ -38,6 +39,7 @@ static auto write_binary_header(std::ostream &output,
now.time_since_epoch())
.count();
header.content_bytes = uncompressed_size;
header.compressed_bytes = compressed_size;
header.duration = duration.count();

const auto hex_string{sourcemeta::core::sha256(payload)};
Expand Down Expand Up @@ -72,14 +74,18 @@ static auto write_metapack(const std::filesystem::path &destination,
const std::span<const std::uint8_t> extension,
const std::chrono::milliseconds duration,
const std::string &content) -> void {
// Always compute the compressed representation so the size lands in
// the header. The codec is gzip today, the field name stays
// compression-agnostic so a future codec swap is mechanical. For
// compressed-storage artifacts the bytes are also what we write to
// disk. For Identity storage only the size is kept.
const auto compressed{sourcemeta::core::gzip(

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Identity metapack writes now hard-depend on gzip success, introducing a new failure path for non-gzip storage.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At src/metapack/metapack.cc, line 82:

<comment>Identity metapack writes now hard-depend on gzip success, introducing a new failure path for non-gzip storage.</comment>

<file context>
@@ -72,14 +74,18 @@ static auto write_metapack(const std::filesystem::path &destination,
+  // so a future codec swap is mechanical). For compressed-storage
+  // artifacts the bytes are also what we write to disk; for Identity
+  // storage only the size is kept.
+  const auto compressed{sourcemeta::core::gzip(
+      reinterpret_cast<const std::uint8_t *>(content.data()), content.size())};
   sourcemeta::core::write_file(destination, [&](std::ostream &output) {
</file context>

reinterpret_cast<const std::uint8_t *>(content.data()), content.size())};
sourcemeta::core::write_file(destination, [&](std::ostream &output) {
write_binary_header(output, mime, encoding, extension, duration, content,
content.size());
content.size(), compressed.size());

if (encoding == MetapackEncoding::GZIP) {
const auto compressed{sourcemeta::core::gzip(
reinterpret_cast<const std::uint8_t *>(content.data()),
content.size())};
output.write(compressed.data(),
static_cast<std::streamsize>(compressed.size()));
} else {
Expand Down Expand Up @@ -335,6 +341,7 @@ auto metapack_info(const sourcemeta::core::FileView &view)
.mime = std::string{mime_data, header->mime_length},
.encoding = header->encoding,
.content_bytes = header->content_bytes,
.compressed_bytes = header->compressed_bytes,
.duration = std::chrono::milliseconds{header->duration}};
}

Expand Down
15 changes: 13 additions & 2 deletions src/router/artifact.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@

#include <cassert> // assert
#include <chrono> // std::chrono::seconds
#include <cstddef> // std::size_t
#include <cstdint> // std::uint8_t, std::uint16_t, std::uint32_t
#include <exception> // std::exception
#include <filesystem> // std::filesystem
#include <format> // std::format
#include <limits> // std::numeric_limits
#include <optional> // std::optional
#include <string> // std::string
#include <string_view> // std::string_view
Expand Down Expand Up @@ -336,8 +338,17 @@ auto RouterAction::artifact_serve(
sourcemeta::one::send_response(status, request, response, contents,
sourcemeta::one::Encoding::GZIP);
} else {
sourcemeta::one::send_response(status, request, response, contents,
sourcemeta::one::Encoding::Identity);
// The header carries the compressed size as a fixed-width `uint64_t`
// to keep the metapack format portable across architectures. Narrow
// to `std::size_t` for the uWS API. On 64-bit hosts this is a no-op,
// and the assert guards a hypothetical 32-bit build from truncating
// a >4 GB artifact (which the indexer would never produce on
// realistic schema inputs).
assert(info->compressed_bytes <= std::numeric_limits<std::size_t>::max());

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Debug-only assert() guards a narrowing static_cast—the check disappears in release builds, so a >4 GB compressed_bytes on a 32-bit build would silently truncate and produce an incorrect Content-Length in HEAD responses.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At src/router/artifact.cc, line 347:

<comment>Debug-only `assert()` guards a narrowing `static_cast`—the check disappears in release builds, so a >4 GB `compressed_bytes` on a 32-bit build would silently truncate and produce an incorrect `Content-Length` in HEAD responses.</comment>

<file context>
@@ -336,6 +338,13 @@ auto RouterAction::artifact_serve(
+    // and the assert guards a hypothetical 32-bit build from truncating
+    // a >4 GB artifact (which the indexer would never produce on
+    // realistic schema inputs).
+    assert(info->compressed_bytes <= std::numeric_limits<std::size_t>::max());
     sourcemeta::one::send_response(
         status, request, response, contents,
</file context>

sourcemeta::one::send_response(
status, request, response, contents,
sourcemeta::one::Encoding::Identity,
static_cast<std::size_t>(info->compressed_bytes));
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
}
}

Expand Down
28 changes: 28 additions & 0 deletions test/e2e/html/hurl/gzip.all.hurl
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,31 @@ header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
bytes count > 0

# RFC 9110 §9.3.2: a HEAD response must declare the same
# Content-Length the matching GET would have sent. The artifact
# is identity-stored, so the server reads the precomputed gzip
# size from the metapack header rather than compressing on the
# fly only to discard the bytes. Capture the GET's
# Content-Length and check that HEAD reports the same value.
GET {{base}}/test/schemas/string.json
Accept-Encoding: gzip
HTTP 200
[Captures]
gzip_content_length: header "Content-Length"

HEAD {{base}}/test/schemas/string.json
Accept-Encoding: gzip
HTTP 200
Cache-Control: public, max-age=0, must-revalidate
Content-Type: application/schema+json
Content-Encoding: gzip
Access-Control-Allow-Origin: *
Access-Control-Expose-Headers: Link, ETag
[Asserts]
header "Content-Length" == "{{gzip_content_length}}"
header "Vary" == "User-Agent, Accept-Encoding"
header "Referrer-Policy" not exists
header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
36 changes: 36 additions & 0 deletions test/unit/metapack/metapack_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,42 @@ TEST(Metapack, write_and_read_text_gzip_large) {
EXPECT_EQ(result.value(), large_text + "\n");
}

TEST(Metapack, compressed_bytes_matches_payload_for_gzip_encoding) {
const auto path{test_path("compressed_bytes_gzip.metapack")};
auto document{sourcemeta::core::JSON::make_object()};
document.assign("hello", sourcemeta::core::JSON{"world"});

sourcemeta::one::metapack_write_json(path, document, "application/json",
sourcemeta::one::MetapackEncoding::GZIP,
{}, std::chrono::milliseconds{0});

sourcemeta::core::FileView view{path};
const auto payload_offset{
sourcemeta::one::metapack_payload_offset(view).value()};
const auto payload_size{view.size() - payload_offset};

const auto info{sourcemeta::one::metapack_info(view).value()};
EXPECT_EQ(info.encoding, sourcemeta::one::MetapackEncoding::GZIP);
EXPECT_EQ(info.compressed_bytes, payload_size);
}

TEST(Metapack, compressed_bytes_populated_for_identity_encoding) {
const auto path{test_path("compressed_bytes_identity.metapack")};
auto document{sourcemeta::core::JSON::make_object()};
document.assign("hello", sourcemeta::core::JSON{"world"});

sourcemeta::one::metapack_write_json(
path, document, "application/json",
sourcemeta::one::MetapackEncoding::Identity, {},
std::chrono::milliseconds{0});

sourcemeta::core::FileView view{path};
const auto info{sourcemeta::one::metapack_info(view).value()};
EXPECT_EQ(info.encoding, sourcemeta::one::MetapackEncoding::Identity);
EXPECT_GT(info.compressed_bytes, 0);
EXPECT_NE(info.compressed_bytes, info.content_bytes);
}

TEST(Metapack, read_text_nullopt_when_content_bytes_exceeds_payload) {
const auto path{test_path("bad_content_bytes.metapack")};

Expand Down
Loading