From 2c08daccd71823c02f6878dfb80ac9bd7cfbe151 Mon Sep 17 00:00:00 2001 From: Vlad Govor Date: Thu, 23 Jan 2025 16:30:37 -0800 Subject: [PATCH 1/3] issue-29: Add a consumer to process requests in chunks [fix] --- onc/+onc/ChunkedResponseConsumer.m | 96 ++++++++++++++++++++++++++++++ onc/+util/do_request.m | 3 +- 2 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 onc/+onc/ChunkedResponseConsumer.m diff --git a/onc/+onc/ChunkedResponseConsumer.m b/onc/+onc/ChunkedResponseConsumer.m new file mode 100644 index 0000000..c4ba277 --- /dev/null +++ b/onc/+onc/ChunkedResponseConsumer.m @@ -0,0 +1,96 @@ +classdef ChunkedResponseConsumer < matlab.net.http.io.GenericConsumer +%% ChunkedResponseConsumer Consumer with an option to control the size of +% chucks which a response is processed in. +% +% Default MATLAB (R2022b) implementation of method `send` in +% `matlab.net.http.ResponseMeassage` has a size limit of 2^30 bytes. +% Responses of greater sizes cause an error at the stage of decoding. +% +% This consumer allows to mitigate the size limitation by processing +% responses in parts of a specified size. Although built-in GenericConsumer +% seems to be able to handle big responses as well, its processing speed is +% slower since MATLAB built-in consumers process parts of responses when +% they get them. This consumer accumulates data in a buffer of the given +% size and processes the whole accumulated chunk at one pass. + + properties (Access=private) + chunkSize % Size of chunks to process in bytes (int) + responseBuffer % Buffer to accumulate data bytes (cell array) + positionInBuffer % Index of next buffer cell to be filled (int) + accumulatedBytes % Number of bytes accumulated in buffer (int) + end + + methods + function obj = ChunkedResponseConsumer(chunkSize) + %% Initialize a customized GenericConsumer for processing responses in + % chunks + % + % ChunkedResponseConsumer(chunkSize) + % + % - chunkSize: (int) Number of bytes to accumulate in a buffer + % + % Returns: (ChunkedResponseConsumer) Consumer object + + if nargin < 1 + chunkSize = 2^29; % default value + end + obj@matlab.net.http.io.GenericConsumer; + obj.chunkSize = chunkSize; + end + + function [len, stop] = putData(obj, data) + %% Process the next block of data + % + % putData(data) + % + % - data: ([uint8]) Array of bytes to be processed + % + % Returns: + % (int) Number of bytes processed at the pass + % (logical) Indicator of a response end + + stop = false; + len = numel(data); + if ~isempty(data) + if obj.accumulatedBytes + len > obj.chunkSize + % process an accumulated chunk + chunk = cell2mat(obj.responseBuffer); + obj.PutMethod(chunk); + obj.responseBuffer = {}; + obj.accumulatedBytes = 0; + obj.positionInBuffer = 1; + end + % store a data block in responseBuffer + % vertical structure of responseBuffer is important for + % stacking cells with cell2mat() into a vertical vector + obj.responseBuffer{obj.positionInBuffer, 1} = data; + obj.accumulatedBytes = obj.accumulatedBytes + len; + obj.positionInBuffer = obj.positionInBuffer + 1; + else + if ~isempty(obj.responseBuffer) + % process the rest of data stored in responseBuffer + chunk = cell2mat(obj.responseBuffer); + obj.PutMethod(chunk); + end + % extract response data to CurrentDelegate + obj.PutMethod(uint8.empty); + obj.CurrentLength = length(obj.CurrentDelegate.Response.Body.Data); + obj.PutMethod = []; + % transfer data from CurrentDelegate to obj.Response + putData@matlab.net.http.io.GenericConsumer(obj, []); + stop = true; + end + end + end + + methods (Access = protected) + function bufsize = start(obj) + %% Call when the response starts + obj.responseBuffer = {}; + obj.CurrentLength = 0; + obj.accumulatedBytes = 0; + obj.positionInBuffer = 1; + bufsize = start@matlab.net.http.io.GenericConsumer(obj); + end + end +end diff --git a/onc/+util/do_request.m b/onc/+util/do_request.m index ba02eb3..0a9222a 100644 --- a/onc/+util/do_request.m +++ b/onc/+util/do_request.m @@ -25,7 +25,8 @@ % run and time request if showInfo, fprintf('\nRequesting URL:\n %s\n', fullUrl); end tic - response = request.send(uri,options); + consumer = onc.ChunkedResponseConsumer; + response = request.send(uri, options, consumer); duration = toc; From 42d4a98af795784bdaed7f14e3299919184d7be3 Mon Sep 17 00:00:00 2001 From: Vlad Govor Date: Mon, 10 Feb 2025 11:04:54 -0800 Subject: [PATCH 2/3] issue-29: Fix typos and add input checks --- onc/+onc/ChunkedResponseConsumer.m | 19 +++++++++++++++---- onc/+util/do_request.m | 3 ++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/onc/+onc/ChunkedResponseConsumer.m b/onc/+onc/ChunkedResponseConsumer.m index c4ba277..7eaa57a 100644 --- a/onc/+onc/ChunkedResponseConsumer.m +++ b/onc/+onc/ChunkedResponseConsumer.m @@ -1,6 +1,6 @@ classdef ChunkedResponseConsumer < matlab.net.http.io.GenericConsumer %% ChunkedResponseConsumer Consumer with an option to control the size of -% chucks which a response is processed in. +% chunks which a response is processed in. % % Default MATLAB (R2022b) implementation of method `send` in % `matlab.net.http.ResponseMeassage` has a size limit of 2^30 bytes. @@ -11,7 +11,7 @@ % seems to be able to handle big responses as well, its processing speed is % slower since MATLAB built-in consumers process parts of responses when % they get them. This consumer accumulates data in a buffer of the given -% size and processes the whole accumulated chunk at one pass. +% size and processes the whole accumulated chunk in one pass. properties (Access=private) chunkSize % Size of chunks to process in bytes (int) @@ -33,6 +33,17 @@ if nargin < 1 chunkSize = 2^29; % default value + else + if ~isnumeric(chunkSize) || length(chunkSize) > 1 + error('MatlabAPI:ChunkedResponseConsumer:BadInputType', ... + 'chunkSize is expected to be a number.') + end + if chunkSize > 2^30 + error('MatlabAPI:ChunkedResponseConsumer:BadInputValue', ... + ['Provided chunkSize is too big and will cause an ' ... + 'error when a response is decoded if its size ' ... + 'exceeds 2^30 bytes. Provided value: ' num2str(chunkSize)]) + end end obj@matlab.net.http.io.GenericConsumer; obj.chunkSize = chunkSize; @@ -84,13 +95,13 @@ end methods (Access = protected) - function bufsize = start(obj) + function buffsize = start(obj) %% Call when the response starts obj.responseBuffer = {}; obj.CurrentLength = 0; obj.accumulatedBytes = 0; obj.positionInBuffer = 1; - bufsize = start@matlab.net.http.io.GenericConsumer(obj); + buffsize = start@matlab.net.http.io.GenericConsumer(obj); end end end diff --git a/onc/+util/do_request.m b/onc/+util/do_request.m index 0a9222a..6717789 100644 --- a/onc/+util/do_request.m +++ b/onc/+util/do_request.m @@ -25,7 +25,8 @@ % run and time request if showInfo, fprintf('\nRequesting URL:\n %s\n', fullUrl); end tic - consumer = onc.ChunkedResponseConsumer; + chunckSize = 2^29; % half of the max response size that `send` can handle + consumer = onc.ChunkedResponseConsumer(chunckSize); response = request.send(uri, options, consumer); duration = toc; From 5fc8a21f34d8512117e55601b472c87c29212fcd Mon Sep 17 00:00:00 2001 From: Vlad Govor Date: Tue, 11 Feb 2025 09:41:17 -0800 Subject: [PATCH 3/3] issue-29: Fix typo --- onc/+util/do_request.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onc/+util/do_request.m b/onc/+util/do_request.m index 6717789..e1eeca8 100644 --- a/onc/+util/do_request.m +++ b/onc/+util/do_request.m @@ -25,8 +25,8 @@ % run and time request if showInfo, fprintf('\nRequesting URL:\n %s\n', fullUrl); end tic - chunckSize = 2^29; % half of the max response size that `send` can handle - consumer = onc.ChunkedResponseConsumer(chunckSize); + chunkSize = 2^29; % half of the max response size that `send` can handle + consumer = onc.ChunkedResponseConsumer(chunkSize); response = request.send(uri, options, consumer); duration = toc;