-
-
Notifications
You must be signed in to change notification settings - Fork 5
Properly response to HEAD with Content-Length on compressed content
#1040
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,7 @@ | |
| namespace sourcemeta::one { | ||
|
|
||
| static constexpr std::uint32_t METAPACK_MAGIC{0x4154454D}; | ||
| static constexpr std::uint16_t METAPACK_VERSION{1}; | ||
| static constexpr std::uint16_t METAPACK_VERSION{2}; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Changing Prompt for AI agents |
||
| static constexpr std::uint64_t METAPACK_MAX_DECOMPRESSION_RATIO{1024}; | ||
|
|
||
| enum class MetapackEncoding : std::uint8_t { Identity = 0, GZIP = 1 }; | ||
|
|
@@ -32,6 +32,14 @@ struct MetapackHeader { | |
| std::uint8_t reserved; | ||
| std::int64_t last_modified; | ||
| std::uint64_t content_bytes; | ||
| // The size in bytes of the artifact after applying the compression | ||
| // matching the supported wire content-coding. The codec is gzip | ||
| // today, the field name stays compression-agnostic so a future codec | ||
| // swap is mechanical. Precomputed at index time so the server can | ||
| // answer HEAD with Content-Encoding set without compressing on the | ||
| // fly only to discard the bytes. For compressed-storage artifacts | ||
| // this equals the payload size on disk. | ||
| std::uint64_t compressed_bytes; | ||
| std::int64_t duration; | ||
| std::array<std::uint8_t, 32> checksum; | ||
| std::uint16_t mime_length; | ||
|
|
@@ -44,6 +52,7 @@ struct MetapackInfo { | |
| std::string mime; | ||
| MetapackEncoding encoding; | ||
| std::uint64_t content_bytes; | ||
| std::uint64_t compressed_bytes; | ||
| std::chrono::milliseconds duration; | ||
| }; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,8 @@ static auto write_binary_header(std::ostream &output, | |
| const std::span<const std::uint8_t> extension, | ||
| const std::chrono::milliseconds duration, | ||
| const std::string_view payload, | ||
| const std::size_t uncompressed_size) -> void { | ||
| const std::size_t uncompressed_size, | ||
| const std::size_t compressed_size) -> void { | ||
| MetapackHeader header{}; | ||
| header.magic = METAPACK_MAGIC; | ||
| header.format_version = METAPACK_VERSION; | ||
|
|
@@ -38,6 +39,7 @@ static auto write_binary_header(std::ostream &output, | |
| now.time_since_epoch()) | ||
| .count(); | ||
| header.content_bytes = uncompressed_size; | ||
| header.compressed_bytes = compressed_size; | ||
| header.duration = duration.count(); | ||
|
|
||
| const auto hex_string{sourcemeta::core::sha256(payload)}; | ||
|
|
@@ -72,14 +74,18 @@ static auto write_metapack(const std::filesystem::path &destination, | |
| const std::span<const std::uint8_t> extension, | ||
| const std::chrono::milliseconds duration, | ||
| const std::string &content) -> void { | ||
| // Always compute the compressed representation so the size lands in | ||
| // the header. The codec is gzip today, the field name stays | ||
| // compression-agnostic so a future codec swap is mechanical. For | ||
| // compressed-storage artifacts the bytes are also what we write to | ||
| // disk. For Identity storage only the size is kept. | ||
| const auto compressed{sourcemeta::core::gzip( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Identity metapack writes now hard-depend on gzip success, introducing a new failure path for non-gzip storage. Prompt for AI agents |
||
| reinterpret_cast<const std::uint8_t *>(content.data()), content.size())}; | ||
| sourcemeta::core::write_file(destination, [&](std::ostream &output) { | ||
| write_binary_header(output, mime, encoding, extension, duration, content, | ||
| content.size()); | ||
| content.size(), compressed.size()); | ||
|
|
||
| if (encoding == MetapackEncoding::GZIP) { | ||
| const auto compressed{sourcemeta::core::gzip( | ||
| reinterpret_cast<const std::uint8_t *>(content.data()), | ||
| content.size())}; | ||
| output.write(compressed.data(), | ||
| static_cast<std::streamsize>(compressed.size())); | ||
| } else { | ||
|
|
@@ -335,6 +341,7 @@ auto metapack_info(const sourcemeta::core::FileView &view) | |
| .mime = std::string{mime_data, header->mime_length}, | ||
| .encoding = header->encoding, | ||
| .content_bytes = header->content_bytes, | ||
| .compressed_bytes = header->compressed_bytes, | ||
| .duration = std::chrono::milliseconds{header->duration}}; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,10 +10,12 @@ | |
|
|
||
| #include <cassert> // assert | ||
| #include <chrono> // std::chrono::seconds | ||
| #include <cstddef> // std::size_t | ||
| #include <cstdint> // std::uint8_t, std::uint16_t, std::uint32_t | ||
| #include <exception> // std::exception | ||
| #include <filesystem> // std::filesystem | ||
| #include <format> // std::format | ||
| #include <limits> // std::numeric_limits | ||
| #include <optional> // std::optional | ||
| #include <string> // std::string | ||
| #include <string_view> // std::string_view | ||
|
|
@@ -336,8 +338,17 @@ auto RouterAction::artifact_serve( | |
| sourcemeta::one::send_response(status, request, response, contents, | ||
| sourcemeta::one::Encoding::GZIP); | ||
| } else { | ||
| sourcemeta::one::send_response(status, request, response, contents, | ||
| sourcemeta::one::Encoding::Identity); | ||
| // The header carries the compressed size as a fixed-width `uint64_t` | ||
| // to keep the metapack format portable across architectures. Narrow | ||
| // to `std::size_t` for the uWS API. On 64-bit hosts this is a no-op, | ||
| // and the assert guards a hypothetical 32-bit build from truncating | ||
| // a >4 GB artifact (which the indexer would never produce on | ||
| // realistic schema inputs). | ||
| assert(info->compressed_bytes <= std::numeric_limits<std::size_t>::max()); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Debug-only Prompt for AI agents |
||
| sourcemeta::one::send_response( | ||
| status, request, response, contents, | ||
| sourcemeta::one::Encoding::Identity, | ||
| static_cast<std::size_t>(info->compressed_bytes)); | ||
|
cubic-dev-ai[bot] marked this conversation as resolved.
|
||
| } | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
endWithoutBodyis called withprecomputed_compressed_size(anstd::optional<std::size_t>), which likely needs the contained numeric value; otherwise this risks a compile error and/or an incorrectContent-LengthforHEADresponses.Severity: high
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.