Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion build_tools/customizations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,18 @@ def dynamodb_example_deep_transform(subsegment, keys)
api['metadata'].delete('signatureVersion')

# handled by endpoints 2.0
api['operations'].each do |_key, operation|
api['operations'].each do |key, operation|
# requestUri should always exist. Remove bucket from path
# and preserve request uri as /
if operation['http'] && operation['http']['requestUri']
operation['http']['requestUri'].gsub!('/{Bucket}', '/')
operation['http']['requestUri'].gsub!('//', '/')
end

next unless %w[PutObject UploadPart].include?(key)

operation['authType'] = 'v4-unsigned-body'
operation['unsignedPayload'] = true
Comment on lines +164 to +167
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't add test cases for unsigned body since its already covered by our test cases here: https://github.com/aws/aws-sdk-ruby/blob/version-3/gems/aws-sdk-core/spec/aws/plugins/checksum_algorithm_spec.rb#L24

end

# Ensure Expires is a timestamp regardless of model to be backwards
Expand Down
4 changes: 2 additions & 2 deletions build_tools/services.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ class ServiceEnumerator
MANIFEST_PATH = File.expand_path('../../services.json', __FILE__)

# Minimum `aws-sdk-core` version for new gem builds
MINIMUM_CORE_VERSION = "3.239.1"
MINIMUM_CORE_VERSION = "3.240.0"

# Minimum `aws-sdk-core` version for new S3 gem builds
MINIMUM_CORE_VERSION_S3 = "3.234.0"
MINIMUM_CORE_VERSION_S3 = "3.240.0"
Comment on lines -12 to +15
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Self-reminder to update the minimum core version when this is ready for release.


EVENTSTREAM_PLUGIN = "Aws::Plugins::EventStreamConfiguration"

Expand Down
2 changes: 2 additions & 0 deletions gems/aws-sdk-core/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Unreleased Changes
------------------

* Feature - Improved memory efficiency when calculating request checksums.

3.239.2 (2025-11-25)
------------------

Expand Down
99 changes: 54 additions & 45 deletions gems/aws-sdk-core/lib/aws-sdk-core/plugins/checksum_algorithm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module Plugins
# @api private
class ChecksumAlgorithm < Seahorse::Client::Plugin
CHUNK_SIZE = 1 * 1024 * 1024 # one MB
MIN_CHUNK_SIZE = 16_384 # 16 KB

# determine the set of supported client side checksum algorithms
# CRC32c requires aws-crt (optional sdk dependency) for support
Expand Down Expand Up @@ -162,9 +163,7 @@ def call(context)
context[:http_checksum] ||= {}

# Set validation mode to enabled when supported.
if context.config.response_checksum_validation == 'when_supported'
enable_request_validation_mode(context)
end
enable_request_validation_mode(context) if context.config.response_checksum_validation == 'when_supported'

@handler.call(context)
end
Expand Down Expand Up @@ -194,9 +193,7 @@ def call(context)
calculate_request_checksum(context, request_algorithm)
end

if should_verify_response_checksum?(context)
add_verify_response_checksum_handlers(context)
end
add_verify_response_checksum_handlers(context) if should_verify_response_checksum?(context)

with_metrics(context.config, algorithm) { @handler.call(context) }
end
Expand Down Expand Up @@ -346,16 +343,16 @@ def calculate_request_checksum(context, checksum_properties)

def apply_request_checksum(context, headers, checksum_properties)
header_name = checksum_properties[:name]
body = context.http_request.body_contents
headers[header_name] = calculate_checksum(
checksum_properties[:algorithm],
body
context.http_request.body
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this change from context.http_request.body_contents to context.http_request.body intentional?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great question. And yes! This was the root cause why the memory was high during testing. Calling #body_contents on the request loads the entire body into memory. It is unknown on why this have been done previously when I asked around so I think just having that body here makes sense (I do a rewind much later before we start calculating checksums).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to do a rewind after calculating as well?

)
end

def calculate_checksum(algorithm, body)
digest = ChecksumAlgorithm.digest_for_algorithm(algorithm)
if body.respond_to?(:read)
body.rewind
update_in_chunks(digest, body)
else
digest.update(body)
Expand Down Expand Up @@ -388,13 +385,15 @@ def apply_request_trailer_checksum(context, headers, checksum_properties)
unless context.http_request.body.respond_to?(:size)
raise Aws::Errors::ChecksumError, 'Could not determine length of the body'
end

headers['X-Amz-Decoded-Content-Length'] = context.http_request.body.size

context.http_request.body = AwsChunkedTrailerDigestIO.new(
context.http_request.body,
checksum_properties[:algorithm],
location_name
)
context.http_request.body =
AwsChunkedTrailerDigestIO.new(
io: context.http_request.body,
algorithm: checksum_properties[:algorithm],
location_name: location_name
)
end

def should_verify_response_checksum?(context)
Expand All @@ -417,10 +416,7 @@ def add_verify_response_headers_handler(context, checksum_context)
context[:http_checksum][:validation_list] = validation_list

context.http_response.on_headers do |_status, headers|
header_name, algorithm = response_header_to_verify(
headers,
validation_list
)
header_name, algorithm = response_header_to_verify(headers, validation_list)
next unless header_name

expected = headers[header_name]
Expand Down Expand Up @@ -466,52 +462,65 @@ def response_header_to_verify(headers, validation_list)
# Wrapper for request body that implements application-layer
# chunking with Digest computed on chunks + added as a trailer
class AwsChunkedTrailerDigestIO
CHUNK_SIZE = 16_384

def initialize(io, algorithm, location_name)
@io = io
@location_name = location_name
@algorithm = algorithm
@digest = ChecksumAlgorithm.digest_for_algorithm(algorithm)
@trailer_io = nil
def initialize(options = {})
@io = options.delete(:io)
@location_name = options.delete(:location_name)
@algorithm = options.delete(:algorithm)
@digest = ChecksumAlgorithm.digest_for_algorithm(@algorithm)
@chunk_size = Thread.current[:net_http_override_body_stream_chunk] || MIN_CHUNK_SIZE
@overhead_bytes = calculate_overhead(@chunk_size)
@max_chunk_size = @chunk_size - @overhead_bytes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find max_chunk_size to be a little bit of a confusing name. Why is it less than chunk size? This is actually the chunk size that we're reading from the underlying io right? so maybe like... base_chunk_size or underlying_chunk_size or something like that?

@current_chunk = ''.b
@eof = false
end

# the size of the application layer aws-chunked + trailer body
def size
# compute the number of chunks
# a full chunk has 4 + 4 bytes overhead, a partial chunk is len.to_s(16).size + 4
orig_body_size = @io.size
n_full_chunks = orig_body_size / CHUNK_SIZE
partial_bytes = orig_body_size % CHUNK_SIZE
chunked_body_size = n_full_chunks * (CHUNK_SIZE + 8)
chunked_body_size += partial_bytes.to_s(16).size + partial_bytes + 4 unless partial_bytes.zero?
n_full_chunks = orig_body_size / @max_chunk_size
partial_bytes = orig_body_size % @max_chunk_size

chunked_body_size = n_full_chunks * (@max_chunk_size + @max_chunk_size.to_s(16).size + 4)
chunked_body_size += partial_bytes.to_s(16).size + partial_bytes + 4 unless partial_bytes.zero?
trailer_size = ChecksumAlgorithm.trailer_length(@algorithm, @location_name)
chunked_body_size + trailer_size
end

def rewind
@io.rewind
@current_chunk.clear
@eof = false
end

def read(length, buf = nil)
# account for possible leftover bytes at the end, if we have trailer bytes, send them
if @trailer_io
return @trailer_io.read(length, buf)
end
def read(_length = nil, buf = nil)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The length parameter is ignored since we always expect 16KB (default) or whatever is set by @chunk_size. This is an internal implementation accessed by either our patch or Net::HTTP.

I could make this more dynamic but that gets tricky... the #size method needs to know the chunk size ahead of time to calculate the overhead bytes plus actual IO size. Supporting variable-length reads would break that calculation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would be the benefit of it being dynamic? If we're always expecting 16KB and this is an established expectation, I think it's fine that we don't use the length input. Not sure how feasible this is, but we could consider removing the parameter altogether in the future as to not cause confusion when using this method.

Copy link
Contributor Author

@jterapin jterapin Dec 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, I don't think we could remove the parameter altogether due to the interfaces... this IO-like object is being read by http-client (net-http in our case) and length is a required parameter. Removing the parameter will break.

What would be the benefit of it being dynamic?

In the scenario where we don't use the patch - net-http could change the way their reads work - like increasing the chunk size and etc. We are bottlenecking the cap to 16KB here. We won't know if things have changed unless net-http checks the length on their end.

return if @eof

buf&.clear
output_buffer = buf || ''.b
fill_chunk if @current_chunk.empty? && !@eof

output_buffer << @current_chunk
@current_chunk.clear
output_buffer
end

private

def calculate_overhead(chunk_size)
chunk_size.to_s(16).size + 4 # hex_length + "\r\n\r\n"
end

chunk = @io.read(length)
def fill_chunk
chunk = @io.read(@max_chunk_size)
if chunk
chunk.force_encoding('ASCII-8BIT')
@digest.update(chunk)
application_chunked = "#{chunk.bytesize.to_s(16)}\r\n#{chunk}\r\n"
return StringIO.new(application_chunked).read(application_chunked.size, buf)
@current_chunk << "#{chunk.bytesize.to_s(16)}\r\n#{chunk}\r\n".b
else
trailers = {}
trailers[@location_name] = @digest.base64digest
trailers = trailers.map { |k,v| "#{k}:#{v}" }.join("\r\n")
@trailer_io = StringIO.new("0\r\n#{trailers}\r\n\r\n")
chunk = @trailer_io.read(length, buf)
trailer_str = { @location_name => @digest.base64digest }.map { |k, v| "#{k}:#{v}" }.join("\r\n")
@current_chunk << "0\r\n#{trailer_str}\r\n\r\n".b
@eof = true
end
chunk
end
end
end
Expand Down
55 changes: 44 additions & 11 deletions gems/aws-sdk-core/lib/seahorse/client/net_http/patches.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,61 @@ module Seahorse
module Client
# @api private
module NetHttp

# @api private
module Patches

def self.apply!
Net::HTTPGenericRequest.prepend(PatchDefaultContentType)
Net::HTTPGenericRequest.prepend(RequestPatches)
end

# For requests with bodies, Net::HTTP sets a default content type of:
# 'application/x-www-form-urlencoded'
# There are cases where we should not send content type at all.
# Even when no body is supplied, Net::HTTP uses a default empty body
# and sets it anyway. This patch disables the behavior when a Thread
# local variable is set.
module PatchDefaultContentType
# Patches intended to override Net::HTTP functionality
module RequestPatches
# For requests with bodies, Net::HTTP sets a default content type of:
# 'application/x-www-form-urlencoded'
# There are cases where we should not send content type at all.
# Even when no body is supplied, Net::HTTP uses a default empty body
# and sets it anyway. This patch disables the behavior when a Thread
# local variable is set.
# See: https://github.com/ruby/net-http/issues/205
def supply_default_content_type
return if Thread.current[:net_http_skip_default_content_type]
Comment on lines +23 to 25
Copy link
Contributor Author

@jterapin jterapin Dec 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could eventually get rid of this - given that net-http no longer sets the default content type in their recent patch. We may want to be move this in later versions of Ruby when the default net-http gem version is the recent one.


super
end
end

# IO.copy_stream is capped at 16KB buffer so this patch intends to
# increase its chunk size for better performance.
# Only intended to use for S3 TM implementation.
# See: https://github.com/ruby/net-http/blob/master/lib/net/http/generic_request.rb#L292
def send_request_with_body_stream(sock, ver, path, f)
return super unless (chunk_size = Thread.current[:net_http_override_body_stream_chunk])

unless content_length || chunked?
raise ArgumentError, 'Content-Length not given and Transfer-Encoding is not `chunked`'
end

supply_default_content_type
write_header(sock, ver, path)
wait_for_continue sock, ver if sock.continue_timeout
if chunked?
chunker = Chunker.new(sock)
RequestIO.custom_stream(f, chunker, chunk_size)
chunker.finish
else
RequestIO.custom_stream(f, sock, chunk_size)
end
end

class RequestIO
def self.custom_stream(src, dst, chunk_size)
copied = 0
while (chunk = src.read(chunk_size))
dst.write(chunk)
copied += chunk.bytesize
end
copied
end
end
end
end
end
end
Expand Down
4 changes: 4 additions & 0 deletions gems/aws-sdk-s3/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Unreleased Changes
------------------

* Feature - Added `:http_chunk_size` parameter to `TransferManager#upload_file` to control the buffer size when streaming request bodies over HTTP. Larger chunk sizes may improve network throughput at the cost of higher memory usage.

* Feature - Improved memory efficiency when calculating request checksums for large file uploads.

1.206.0 (2025-12-02)
------------------

Expand Down
4 changes: 2 additions & 2 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17659,7 +17659,7 @@ def put_bucket_website(params = {}, options = {})
# [3]: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-using-rest-api.html
# [4]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/about-object-ownership.html
#
# @option params [String, StringIO, File] :body
# @option params [String, IO] :body
# Object data.
#
# @option params [required, String] :bucket
Expand Down Expand Up @@ -20968,7 +20968,7 @@ def update_bucket_metadata_journal_table_configuration(params = {}, options = {}
# [16]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html
# [17]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUploads.html
#
# @option params [String, StringIO, File] :body
# @option params [String, IO] :body
# Object data.
#
# @option params [required, String] :bucket
Expand Down
2 changes: 2 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/client_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4121,6 +4121,7 @@ module ClientApi
"requestAlgorithmMember" => "checksum_algorithm",
"requestChecksumRequired" => false,
}
o['unsignedPayload'] = true
o.input = Shapes::ShapeRef.new(shape: PutObjectRequest)
o.output = Shapes::ShapeRef.new(shape: PutObjectOutput)
o.errors << Shapes::ShapeRef.new(shape: InvalidRequest)
Expand Down Expand Up @@ -4309,6 +4310,7 @@ module ClientApi
"requestAlgorithmMember" => "checksum_algorithm",
"requestChecksumRequired" => false,
}
o['unsignedPayload'] = true
o.input = Shapes::ShapeRef.new(shape: UploadPartRequest)
o.output = Shapes::ShapeRef.new(shape: UploadPartOutput)
end)
Expand Down
10 changes: 9 additions & 1 deletion gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class FileUploader
def initialize(options = {})
@client = options[:client] || Client.new
@executor = options[:executor]
@http_chunk_size = options[:http_chunk_size]
@multipart_threshold = options[:multipart_threshold] || DEFAULT_MULTIPART_THRESHOLD
end

Expand All @@ -37,7 +38,11 @@ def initialize(options = {})
def upload(source, options = {})
Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
if File.size(source) >= @multipart_threshold
MultipartFileUploader.new(client: @client, executor: @executor).upload(source, options)
MultipartFileUploader.new(
client: @client,
executor: @executor,
http_chunk_size: @http_chunk_size
).upload(source, options)
else
put_object(source, options)
end
Expand All @@ -59,7 +64,10 @@ def put_object(source, options)
options[:on_chunk_sent] = single_part_progress(callback)
end
open_file(source) do |file|
Thread.current[:net_http_override_body_stream_chunk] = @http_chunk_size if @http_chunk_size
@client.put_object(options.merge(body: file))
ensure
Thread.current[:net_http_override_body_stream_chunk] = nil
end
end

Expand Down
3 changes: 3 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class MultipartFileUploader
def initialize(options = {})
@client = options[:client] || Client.new
@executor = options[:executor]
@http_chunk_size = options[:http_chunk_size]
end

# @return [Client]
Expand Down Expand Up @@ -150,6 +151,7 @@ def upload_with_executor(pending, completed, options)

upload_attempts += 1
@executor.post(part) do |p|
Thread.current[:net_http_override_body_stream_chunk] = @http_chunk_size if @http_chunk_size
update_progress(progress, p)
resp = @client.upload_part(p)
p[:body].close
Expand All @@ -160,6 +162,7 @@ def upload_with_executor(pending, completed, options)
abort_upload = true
errors << e
ensure
Thread.current[:net_http_override_body_stream_chunk] = nil if @http_chunk_size
completion_queue << :done
end
end
Expand Down
Loading
Loading