diff --git a/src/ar_block.erl b/src/ar_block.erl new file mode 100644 index 000000000..c36229b9c --- /dev/null +++ b/src/ar_block.erl @@ -0,0 +1,59 @@ +%%% @doc Copied and adapted from the arweave codebase. +%%% Should track: https://github.com/ArweaveTeam/arweave/blob/master/apps/arweave/src/ar_block.erl +-module(ar_block). + +-export([strict_data_split_threshold/0, get_chunk_padded_offset/1, generate_size_tagged_list_from_txs/2]). + +-include("include/ar.hrl"). + +%%%=================================================================== +%%% Public interface. +%%%=================================================================== + +strict_data_split_threshold() -> ?STRICT_DATA_SPLIT_THRESHOLD. + +%% @doc Return Offset if it is smaller than or equal to ar_block:strict_data_split_threshold(). +%% Otherwise, return the offset of the last byte of the chunk + the size of the padding. +-spec get_chunk_padded_offset(Offset :: non_neg_integer()) -> non_neg_integer(). +get_chunk_padded_offset(Offset) -> + case Offset > ar_block:strict_data_split_threshold() of + true -> + ar_poa:get_padded_offset(Offset, ar_block:strict_data_split_threshold()); + false -> + Offset + end. + +generate_size_tagged_list_from_txs(TXs, Height) -> + lists:reverse( + element(2, + lists:foldl( + fun(TX, {Pos, List}) -> + DataSize = TX#tx.data_size, + End = Pos + DataSize, + case Height >= ar_fork:height_2_5() of + true -> + Padding = ar_tx:get_weave_size_increase(DataSize, Height) + - DataSize, + %% Encode the padding information in the Merkle tree. + case Padding > 0 of + true -> + PaddingRoot = <<>>, + {End + Padding, [{{padding, PaddingRoot}, End + Padding}, + {{TX, get_tx_data_root(TX)}, End} | List]}; + false -> + {End, [{{TX, get_tx_data_root(TX)}, End} | List]} + end; + false -> + {End, [{{TX, get_tx_data_root(TX)}, End} | List]} + end + end, + {0, []}, + lists:sort(TXs) + ) + ) + ). + +get_tx_data_root(#tx{ format = 2, data_root = DataRoot }) -> + DataRoot; +get_tx_data_root(TX) -> + (ar_tx:generate_chunk_tree(TX))#tx.data_root. \ No newline at end of file diff --git a/src/ar_bundles.erl b/src/ar_bundles.erl index 3f070b54a..9b72115ef 100644 --- a/src/ar_bundles.erl +++ b/src/ar_bundles.erl @@ -5,6 +5,7 @@ -export([encode_tags/1, decode_tags/1]). -export([serialize/1, deserialize/1, serialize_bundle/3]). -export([data_item_signature_data/1]). +-export([bundle_header_size/1, decode_bundle_header/1]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -430,14 +431,14 @@ maybe_unbundle(Item) -> unbundle_list(Item) -> case unbundle(Item#tx.data) of - detached -> Item#tx{data = detached}; + ?DEFAULT_DATA -> Item#tx{data = ?DEFAULT_DATA}; Items -> Item#tx{data = hb_util:list_to_numbered_message(Items)} end. unbundle_map(Item) -> MapTXID = dev_arweave_common:tagfind(<<"bundle-map">>, Item#tx.tags, <<>>), case unbundle(Item#tx.data) of - detached -> Item#tx{data = detached}; + ?DEFAULT_DATA -> Item#tx{data = ?DEFAULT_DATA}; Items -> MapItem = find_single_layer(hb_util:decode(MapTXID), Items), Map = hb_json:decode(MapItem#tx.data), @@ -469,7 +470,7 @@ find_single_layer(UnsignedID, Items) -> unbundle(<>) -> {ItemsBin, Items} = decode_bundle_header(Count, Content), decode_bundle_items(Items, ItemsBin); -unbundle(<<>>) -> detached. +unbundle(?DEFAULT_DATA) -> ?DEFAULT_DATA. decode_bundle_items([], <<>>) -> []; @@ -487,10 +488,23 @@ decode_bundle_items([{_ID, Size} | RestItems], ItemsBin) -> ) ]. +bundle_header_size(<>) -> + % Eeach item in the bundle header index consumes 64 bytes + 32 + (Count * 64). + +decode_bundle_header(<>) -> + decode_bundle_header(Count, Content); +decode_bundle_header(<<>>) -> + {<<>>, []}. + decode_bundle_header(Count, Bin) -> decode_bundle_header(Count, Bin, []). decode_bundle_header(0, ItemsBin, Header) -> {ItemsBin, lists:reverse(Header)}; -decode_bundle_header(Count, <>, Header) -> +decode_bundle_header( + Count, + <>, + Header +) -> decode_bundle_header(Count - 1, Rest, [{ID, Size} | Header]). %% @doc Decode the signature from a binary format. Only RSA 4096 is currently supported. @@ -643,6 +657,38 @@ with_zero_length_tag_test() -> Deserialized = deserialize(Serialized), ?assertEqual(Item, Deserialized). +decode_bundle_header_test() -> + ?assertEqual({<<>>, []}, decode_bundle_header(<<>>)), + Tail = <<"tail">>, + ?assertEqual( + {Tail, []}, + decode_bundle_header(<<0:256/little, Tail/binary>>) + ), + ID1 = crypto:strong_rand_bytes(32), + Items1 = <<"abcde">>, + ?assertEqual( + {Items1, [{ID1, 5}]}, + decode_bundle_header(<<1:256/little, 5:256/little, ID1:32/binary, Items1/binary>>) + ), + ID2 = crypto:strong_rand_bytes(32), + ID3 = crypto:strong_rand_bytes(32), + Items2 = <<"payload">>, + ?assertEqual( + {Items2, [{ID2, 4}, {ID3, 2}]}, + decode_bundle_header( + << + 2:256/little, + 4:256/little, ID2:32/binary, + 2:256/little, ID3:32/binary, + Items2/binary + >> + ) + ), + ?assertEqual( + {<<>>, [{ID1, 6}]}, + decode_bundle_header(<<1:256/little, 6:256/little, ID1:32/binary>>) + ). + unsigned_data_item_id_test() -> Item1 = deserialize( serialize( diff --git a/src/ar_fork.erl b/src/ar_fork.erl new file mode 100644 index 000000000..0209cae57 --- /dev/null +++ b/src/ar_fork.erl @@ -0,0 +1,173 @@ +%%% +%%% @doc The module defines Arweave hard forks' heights. +%%% + +-module(ar_fork). + +-export([height_1_6/0, height_1_7/0, height_1_8/0, height_1_9/0, height_2_0/0, height_2_2/0, + height_2_3/0, height_2_4/0, height_2_5/0, height_2_6/0, height_2_6_8/0, + height_2_7/0, height_2_7_1/0, height_2_7_2/0, + height_2_8/0, height_2_9/0]). + +-ifdef(FORKS_RESET). +height_1_6() -> + 0. +-else. +height_1_6() -> + 95000. +-endif. + +-ifdef(FORKS_RESET). +height_1_7() -> + 0. +-else. +height_1_7() -> + 235200. % Targeting 2019-07-08 UTC +-endif. + +-ifdef(FORKS_RESET). +height_1_8() -> + 0. +-else. +height_1_8() -> + 269510. % Targeting 2019-08-29 UTC +-endif. + +-ifdef(FORKS_RESET). +height_1_9() -> + 0. +-else. +height_1_9() -> + 315700. % Targeting 2019-11-04 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_0() -> + 0. +-else. +height_2_0() -> + 422250. % Targeting 2020-04-09 10:00 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_2() -> + 0. +-else. +height_2_2() -> + 552180. % Targeting 2020-10-21 13:00 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_3() -> + 0. +-else. +height_2_3() -> + 591140. % Targeting 2020-12-21 11:00 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_4() -> + 0. +-else. +height_2_4() -> + 633720. % Targeting 2021-02-24 11:50 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_5() -> + 0. +-else. +height_2_5() -> + 812970. +-endif. + +-ifdef(FORK_2_6_HEIGHT). +height_2_6() -> + ?FORK_2_6_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_6() -> + 0. + -else. + height_2_6() -> + 1132210. % Targeting 2023-03-06 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_6_8_HEIGHT). +height_2_6_8() -> + ?FORK_2_6_8_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_6_8() -> + 0. + -else. + height_2_6_8() -> + 1189560. % Targeting 2023-05-30 16:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_7_HEIGHT). +height_2_7() -> + ?FORK_2_7_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_7() -> + 0. + -else. + height_2_7() -> + 1275480. % Targeting 2023-10-04 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_7_1_HEIGHT). +height_2_7_1() -> + ?FORK_2_7_1_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_7_1() -> + 0. + -else. + height_2_7_1() -> + 1316410. % Targeting 2023-12-05 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_7_2_HEIGHT). +height_2_7_2() -> + ?FORK_2_7_2_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_7_2() -> + 0. + -else. + height_2_7_2() -> + 1391330. % Targeting 2024-03-26 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_8_HEIGHT). +height_2_8() -> + ?FORK_2_8_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_8() -> + 0. + -else. + height_2_8() -> + 1547120. % Targeting 2024-11-13 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_9_HEIGHT). +height_2_9() -> + ?FORK_2_9_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_9() -> + 0. + -else. + height_2_9() -> + 1602350. % Targeting 2025-02-03 14:00 UTC + -endif. +-endif. diff --git a/src/ar_format.erl b/src/ar_format.erl index 8ee18d8bb..c08a99c91 100644 --- a/src/ar_format.erl +++ b/src/ar_format.erl @@ -18,7 +18,7 @@ format(TX, Indent, Opts) when is_list(TX); is_map(TX) -> format(TX, Indent, Opts) when is_record(TX, tx) -> MustVerify = hb_opts:get(debug_ids, true, Opts), Valid = - if MustVerify -> verify(TX); + if MustVerify -> verify(dev_arweave_common:normalize(TX)); true -> true end, UnsignedID = @@ -65,7 +65,7 @@ format(TX, Indent, Opts) when is_record(TX, tx) -> [hb_util:safe_encode(ar_bundles:signer(TX))], Indent + 1), format_line("Signature: ~s", - [hb_format:binary(TX#tx.signature)], + [hb_format:binary(TX#tx.signature, Opts)], Indent + 1); false -> [] end ++ @@ -125,6 +125,7 @@ format_fields(TX, Indent) -> format_anchor(TX, Indent) ++ format_quantity(TX, Indent) ++ format_reward(TX, Indent) ++ + format_data_size(TX, Indent) ++ format_data_root(TX, Indent). format_format(TX, Indent) -> @@ -152,6 +153,9 @@ format_quantity(TX, Indent) -> format_reward(TX, Indent) -> format_line("Reward: ~p", [TX#tx.reward], Indent + 1). +format_data_size(TX, Indent) -> + format_line("Data Size: ~p", [TX#tx.data_size], Indent + 1). + format_data_root(TX, Indent) -> format_line("Data Root: ~s", [ case TX#tx.data_root of diff --git a/src/ar_poa.erl b/src/ar_poa.erl new file mode 100644 index 000000000..8a88d8e90 --- /dev/null +++ b/src/ar_poa.erl @@ -0,0 +1,18 @@ +%%% @doc This module implements all mechanisms required to validate a proof of access +%%% for a chunk of data received from the network. +-module(ar_poa). + +-export([get_padded_offset/1, get_padded_offset/2]). + +-include("include/ar.hrl"). + +%% @doc Return the smallest multiple of 256 KiB >= Offset +%% counting from ar_block:strict_data_split_threshold(). +get_padded_offset(Offset) -> + get_padded_offset(Offset, ar_block:strict_data_split_threshold()). + +%% @doc Return the smallest multiple of 256 KiB >= Offset +%% counting from StrictDataSplitThreshold. +get_padded_offset(Offset, StrictDataSplitThreshold) -> + Diff = Offset - StrictDataSplitThreshold, + StrictDataSplitThreshold + ((Diff - 1) div (?DATA_CHUNK_SIZE) + 1) * (?DATA_CHUNK_SIZE). diff --git a/src/ar_tx.erl b/src/ar_tx.erl index 205bb85a8..5fc7b03b0 100644 --- a/src/ar_tx.erl +++ b/src/ar_tx.erl @@ -2,14 +2,20 @@ -module(ar_tx). -export([sign/2, verify/1, verify_tx_id/2]). --export([id/1, id/2, get_owner_address/1, data_root/1]). +-export([id/1, id/2, get_owner_address/1, data_root/1, data_root/2]). -export([generate_signature_data_segment/1, generate_chunk_id/1]). -export([json_struct_to_tx/1, tx_to_json_struct/1]). --export([chunk_binary/2, chunks_to_size_tagged_chunks/1, sized_chunks_to_sized_chunk_ids/1]). +-export([generate_chunk_tree/1, generate_chunk_tree/2]). +-export([chunk_binary/2, chunk_binary/3, chunking_mode/1]). +-export([chunks_to_size_tagged_chunks/1, sized_chunks_to_sized_chunk_ids/1]). +-export([get_weave_size_increase/2]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). +%% Minimum chunk size targeted by the arweave-js chuking algorithm. +-define(MIN_CHUNK_SIZE, (32 * 1024)). + %%%=================================================================== %%% Public interface. %%%=================================================================== @@ -88,7 +94,10 @@ get_owner_address(#tx{ owner_address = OwnerAddress }) -> OwnerAddress. data_root(Bin) -> - Chunks = chunk_binary(?DATA_CHUNK_SIZE, Bin), + data_root(arweavejs, Bin). + +data_root(Mode, Bin) -> + Chunks = chunk_binary(Mode, ?DATA_CHUNK_SIZE, Bin), SizeTaggedChunks = chunks_to_size_tagged_chunks(Chunks), SizeTaggedChunkIDs = sized_chunks_to_sized_chunk_ids(SizeTaggedChunks), {Root, _} = ar_merkle:generate_tree(SizeTaggedChunkIDs), @@ -206,13 +215,13 @@ verify_hash(#tx{ id = ID } = TX) -> ID == dev_arweave_common:generate_id(TX, signed). %% @doc On Arweave we don't have data on format=2 transactions, and so -%% traditionally just verify the transcation based on data_rot and data_size. +%% traditionally just verify the transaction based on data_root and data_size. %% However in HyperBEAM we will often populate the data field. Adding this %% check to verify that `data_root`, `data_size`, and `data` are consistent. verify_v2_data(#tx{ format = 2, data = ?DEFAULT_DATA }) -> true; -verify_v2_data(#tx{ - format = 2, data_root = DataRoot, +verify_v2_data(#tx{ + format = 2, data_root = DataRoot, data_size = DataSize, data = Data }) -> (DataSize == byte_size(Data)) andalso (DataRoot == data_root(Data)); verify_v2_data(_) -> @@ -359,10 +368,11 @@ tx_to_json_struct( %% Used to compute the Merkle roots of v1 transactions' data and to compute %% Merkle proofs for v2 transactions when their data is uploaded without proofs. generate_chunk_tree(TX) -> + Mode = chunking_mode(TX#tx.format), generate_chunk_tree(TX, sized_chunks_to_sized_chunk_ids( chunks_to_size_tagged_chunks( - chunk_binary(?DATA_CHUNK_SIZE, TX#tx.data) + chunk_binary(Mode, ?DATA_CHUNK_SIZE, TX#tx.data) ) ) ). @@ -378,11 +388,43 @@ generate_chunk_id(Chunk) -> %% @doc Split the binary into chunks. Used for computing the Merkle roots of %% v1 transactions' data and computing Merkle proofs for v2 transactions' when %% their data is uploaded without proofs. -chunk_binary(ChunkSize, Bin) when byte_size(Bin) < ChunkSize -> - [Bin]; chunk_binary(ChunkSize, Bin) -> + chunk_binary(arweavejs, ChunkSize, Bin). + +chunking_mode(1) -> + legacy; +chunking_mode(2) -> + arweavejs; +chunking_mode(_) -> + legacy. + +%% @doc Split the binary into chunks using the requested mode. +%% legacy: fixed-size chunking with a smaller final chunk. +%% arweavejs: size-balanced chunking where the last two chunks may be small. +%% This is the chunking logic used by the arweave-js library. +%% Adapted from: https://github.com/ArweaveTeam/arweave-js/blob/39d8ef2799a2c555e6f9b0cc6adabd7cbc411bc8/src/common/lib/merkle.ts#L43 +chunk_binary(legacy, ChunkSize, Bin) when byte_size(Bin) < ChunkSize -> + [Bin]; +chunk_binary(legacy, ChunkSize, Bin) -> <> = Bin, - [ChunkBin | chunk_binary(ChunkSize, Rest)]. + [ChunkBin | chunk_binary(legacy, ChunkSize, Rest)]; +chunk_binary(arweavejs, ChunkSize, Bin) -> + chunk_binary_arweavejs(arweavejs, ChunkSize, Bin, []). + +chunk_binary_arweavejs(arweavejs, ChunkSize, Bin, Acc) + when byte_size(Bin) >= ChunkSize -> + BinSize = byte_size(Bin), + NextChunkSize = BinSize - ChunkSize, + ChunkSize2 = + case NextChunkSize > 0 andalso NextChunkSize < ?MIN_CHUNK_SIZE of + true -> + (BinSize + 1) div 2; + false -> ChunkSize + end, + <> = Bin, + chunk_binary_arweavejs(arweavejs, ChunkSize, Rest, [Chunk | Acc]); +chunk_binary_arweavejs(arweavejs, _ChunkSize, Bin, Acc) -> + lists:reverse([Bin | Acc]). %% @doc Assign a byte offset to every chunk in the list. chunks_to_size_tagged_chunks(Chunks) -> @@ -405,6 +447,22 @@ chunks_to_size_tagged_chunks(Chunks) -> sized_chunks_to_sized_chunk_ids(SizedChunks) -> [{generate_chunk_id(Chunk), Size} || {Chunk, Size} <- SizedChunks]. +%% @doc Return the number of bytes the weave is increased by when the given transaction +%% is included. +get_weave_size_increase(#tx{ data_size = DataSize }, Height) -> + get_weave_size_increase(DataSize, Height); + +get_weave_size_increase(0, _Height) -> + 0; +get_weave_size_increase(DataSize, Height) -> + case Height >= ar_fork:height_2_5() of + true -> + %% The smallest multiple of ?DATA_CHUNK_SIZE larger than or equal to data_size. + ar_poa:get_padded_offset(DataSize, 0); + false -> + DataSize + end. + %%%=================================================================== %%% Tests. %%%=================================================================== @@ -420,6 +478,31 @@ new(Data, Reward) -> data_size = byte_size(Data) }. +chunk_binary_legacy_test() -> + ChunkSize = 10, + Data = binary:copy(<<"a">>, 25), + ChunksLegacy = chunk_binary(legacy, ChunkSize, Data), + ?assertEqual([10, 10, 5], [byte_size(Chunk) || Chunk <- ChunksLegacy]), + ?assertEqual(ChunksLegacy, chunk_binary(legacy, ChunkSize, Data)). + +chunk_binary_arweavejs_balanced_test() -> + ChunkSize = ?DATA_CHUNK_SIZE, + MinChunkSize = ?MIN_CHUNK_SIZE, + DataSize = ChunkSize + MinChunkSize - 1, + Data = binary:copy(<<"b">>, DataSize), + Chunks = chunk_binary(arweavejs, ChunkSize, Data), + ExpectedFirst = (DataSize + 1) div 2, + ExpectedSecond = DataSize - ExpectedFirst, + ?assertEqual([ExpectedFirst, ExpectedSecond], [byte_size(Chunk) || Chunk <- Chunks]). + +chunk_binary_arweavejs_standard_test() -> + ChunkSize = ?DATA_CHUNK_SIZE, + MinChunkSize = ?MIN_CHUNK_SIZE, + DataSize = ChunkSize + MinChunkSize, + Data = binary:copy(<<"c">>, DataSize), + Chunks = chunk_binary(arweavejs, ChunkSize, Data), + ?assertEqual([ChunkSize, MinChunkSize], [byte_size(Chunk) || Chunk <- Chunks]). + sign_tx_test_() -> {timeout, 30, fun test_sign_tx/0}. diff --git a/src/dev_arweave.erl b/src/dev_arweave.erl index f40a188d2..0eac81db1 100644 --- a/src/dev_arweave.erl +++ b/src/dev_arweave.erl @@ -4,17 +4,21 @@ %%% The node(s) that are used to query data may be configured by altering the %%% `/arweave` route in the node's configuration message. -module(dev_arweave). --export([tx/3, chunk/3, block/3, current/3, status/3, price/3, tx_anchor/3]). +-export([tx/3, raw/3, chunk/3, block/3, current/3, status/3, price/3, tx_anchor/3]). -export([post_tx/3, post_tx/4, post_binary_ans104/2]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). +-define(IS_BLOCK_ID(X), (is_binary(X) andalso byte_size(X) == 64)). + + %% @doc Proxy the `/info' endpoint from the Arweave node. status(_Base, _Request, Opts) -> request(<<"GET">>, <<"/info">>, Opts). -%% @doc Returns the given transaction, if known to the client node(s), as an -%% AO-Core message. +%% @doc Returns the given transaction as an AO-Core message. By default, this +%% embeds the `/raw` payload. Set `exclude-data` to true to return just the +%% header. tx(Base, Request, Opts) -> case hb_maps:get(<<"method">>, Request, <<"GET">>, Opts) of <<"POST">> -> post_tx(Base, Request, Opts); @@ -98,22 +102,37 @@ post_binary_ans104(SerializedTX, Opts) -> } ). -%% @doc Get a transaction ID from the Arweave node, as indicated by the `tx` key -%% in the request or base message. If the `data' key is present and set to -%% `false', the data is not retrieved and added to the response. If the `data' -%% key is set to `always', transactions for which the header is available but -%% the data is not will lead to an error. Otherwise, just the header will be -%% returned. +%% @doc Get a transaction from the Arweave node, as indicated by the +%% `tx` key in the request or base message. By default, this embeds the data +%% payload. Set `exclude_data` to true to return just the header. get_tx(Base, Request, Opts) -> case find_txid(Base, Request, Opts) of not_found -> {error, not_found}; - TXID -> request(<<"GET">>, <<"/tx/", TXID/binary>>, Opts) + TXID -> + request( + <<"GET">>, + <<"/tx/", TXID/binary>>, + Opts#{ exclude_data => exclude_data(Base, Request, Opts) } + ) + end. + +%% @doc Get raw transaction data from the Arweave node, as indicated by the +%% `tx` key in the request or base message. +raw(Base, Request, Opts) -> + case find_txid(Base, Request, Opts) of + not_found -> {error, not_found}; + TXID -> data(TXID, Opts) end. +%% @doc Retrieve the data of a transaction from Arweave. +data(TXID, Opts) -> + ?event({retrieving_tx_data, {tx, TXID}}), + request(<<"GET">>, <<"/raw/", TXID/binary>>, Opts). + chunk(Base, Request, Opts) -> case hb_maps:get(<<"method">>, Request, <<"GET">>, Opts) of <<"POST">> -> post_chunk(Base, Request, Opts); - <<"GET">> -> {error, not_implemented} + <<"GET">> -> get_chunk_range(Base, Request, Opts) end. post_chunk(_Base, Request, Opts) -> @@ -128,32 +147,53 @@ post_chunk(_Base, Request, Opts) -> Opts ). -add_data(TXID, TXHeader, Opts) -> - case data(TXID, Opts) of - {ok, Data} -> - TX = TXHeader#tx{ data = Data }, - ?event( - {retrieved_tx_with_data, - {id, TXID}, - {data_size, byte_size(Data)}, - {tx, TX} - } - ), - {ok, TX}; +get_chunk_range(_Base, Request, Opts) -> + Offset = hb_util:int(hb_ao:get(<<"offset">>, Request, Opts)), + % Default to 1 to ensure a single chunk is read. + Length = hb_util:int(hb_ao:get(<<"length">>, Request, 1, Opts)), + case get_chunk_range(Offset, Length, Opts, [], 0) of + {ok, Chunks} -> + Data = iolist_to_binary(Chunks), + % When no `length` is specified, we'll read only a single chunk. + % If `length` is specified, we'll read until we've accumulated + % `length` or more bytes, and then truncate down to `length` bytes. + case hb_maps:is_key(<<"length">>, Request, Opts) of + true -> + {ok, binary:part(Data, 0, Length)}; + false -> + {ok, Data} + end; {error, Reason} -> - ?event( - {data_retrieval_failed_after_header, - {id, TXID}, - {error, Reason} - } - ), {error, Reason} end. -%% @doc Retrieve the data of a transaction from Arweave. -data(TXID, Opts) -> - ?event({retrieving_tx_data, {tx, TXID}}), - request(<<"GET">>, <<"/raw/", TXID/binary>>, Opts). +get_chunk_range(_Offset, Length, _Opts, Chunks, Size) + when Size >= Length -> + {ok, lists:reverse(Chunks)}; +get_chunk_range(Offset, Length, Opts, Chunks, Size) -> + case get_chunk(Offset, Opts) of + {ok, JSON} -> + Chunk = hb_util:decode(maps:get(<<"chunk">>, JSON)), + ChunkSize = byte_size(Chunk), + AbsoluteEndOffset = hb_util:int(maps:get(<<"absolute_end_offset">>, JSON)), + AbsoluteStartOffset = AbsoluteEndOffset - ChunkSize + 1, + StartGap = Offset - AbsoluteStartOffset, + TruncatedLength = ChunkSize - StartGap, + SlicedChunk = binary:part(Chunk, StartGap, TruncatedLength), + get_chunk_range( + AbsoluteEndOffset + 1, + Length, + Opts, + [SlicedChunk | Chunks], + Size + byte_size(SlicedChunk) + ); + {error, Reason} -> + {error, Reason} + end. + +get_chunk(Offset, Opts) -> + Path = <<"/chunk/", (hb_util:bin(Offset))/binary>>, + request(<<"GET">>, Path, Opts). %% @doc Retrieve (and cache) block information from Arweave. If the `block' key %% is present, it is used to look up the associated block. If it is of Arweave @@ -173,10 +213,10 @@ block(Base, Request, Opts) -> case Block of <<"current">> -> current(Base, Request, Opts); not_found -> current(Base, Request, Opts); - ID when ?IS_ID(ID) -> block({id, ID}, Opts); + ID when ?IS_BLOCK_ID(ID) -> block({id, ID}, Opts); MaybeHeight -> try hb_util:int(MaybeHeight) of - Int -> block({height, Int}, Opts) + Int -> block({height, Int}, Opts) catch _:_ -> { @@ -244,6 +284,18 @@ find_txid(Base, Request, Opts) -> Opts ). +exclude_data(Base, Request, Opts) -> + RawValue = + hb_ao:get_first( + [ + {Request, <<"exclude-data">>}, + {Base, <<"exclude-data">>} + ], + false, + Opts + ), + hb_util:bool(RawValue). + %% @doc Make a request to the Arweave node and parse the response into an %% AO-Core message. Most Arweave API responses are in JSON format, but without %% a `content-type' header. Subsequently, we parse the response manually and @@ -256,7 +308,10 @@ request(Method, Path, Opts) -> <<"path">> => <<"/arweave", Path/binary>>, <<"method">> => Method }, - Opts + Opts#{ + http_retry => 3, + http_retry_response => [error, failure] + } ), to_message(Path, Res, Opts). @@ -277,16 +332,20 @@ to_message(Path = <<"/tx/", TXID/binary>>, {ok, #{ <<"body">> := Body }}, Opts) {tx, TXHeader} } ), - {ok, TX} = add_data(TXID, TXHeader, Opts), - { - ok, - hb_message:convert( - TX, - <<"structured@1.0">>, - <<"tx@1.0">>, - Opts - ) - }; + case hb_opts:get(exclude_data, false, Opts) of + true -> + {ok, hb_message:convert(TXHeader, <<"structured@1.0">>, <<"tx@1.0">>, Opts)}; + false -> + case data(TXID, Opts) of + {ok, RawData} -> + TX = TXHeader#tx{ data = RawData }, + {ok, hb_message:convert(TX, <<"structured@1.0">>, <<"tx@1.0">>, Opts)}; + {error, not_found} -> + {ok, hb_message:convert(TXHeader, <<"structured@1.0">>, <<"tx@1.0">>, Opts)}; + Error -> + Error + end + end; to_message(Path = <<"/raw/", _/binary>>, {ok, #{ <<"body">> := Body }}, _Opts) -> ?event( {arweave_raw_response, @@ -384,9 +443,15 @@ post_ans104_tx_test() -> ok. get_tx_basic_data_test() -> - Node = hb_http_server:start_node(), - Path = <<"/~arweave@2.9-pre/tx=ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, - {ok, Structured} = hb_http:get(Node, Path, #{}), + {ok, Structured} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, + <<"exclude-data">> => false + }, + #{} + ), ?event(debug_test, {structured_tx, Structured}), ?assert(hb_message:verify(Structured, all, #{})), % Hash the data to make it easier to match @@ -404,18 +469,79 @@ get_tx_basic_data_test() -> ?assert(hb_message:match(ExpectedMsg, StructuredWithHash, only_present)), ok. +%% @doc The data for this transaction ends with two smaller chunks. +get_tx_split_chunk_test() -> + {ok, Structured} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + <<"exclude-data">> => false + }, + #{} + ), + ?assert(hb_message:verify(Structured, all, #{})), + ?assertEqual( + <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + hb_message:id(Structured, signed)), + ExpectedMsg = #{ + <<"reward">> => <<"6035386935">>, + <<"anchor">> => <<"PX16-598IrIMvLxFkvfNTWLVKXqXSmArOdW3o7X8jWMCH1fiNOjBZ2XjQlw0FOme">>, + <<"Contract">> => <<"KTzTXT_ANmF84fWEKHzWURD1LWd9QaFR9yfYUwH2Lxw">> + }, + ?assert(hb_message:match(ExpectedMsg, Structured, only_present)), + + Child = hb_ao:get(<<"1/2">>, Structured), + ?assert(hb_message:verify(Child, all, #{})), + ?event(debug_test, {child, {explicit, hb_message:id(Child, signed)}}), + ?assertEqual( + <<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, + hb_message:id(Child, signed)), + ok. + +get_tx_basic_data_exclude_data_test() -> + {ok, Structured} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, + <<"exclude-data">> => true + }, + #{} + ), + ?event(debug_test, {structured_tx, Structured}), + ?assert(hb_message:verify(Structured, all, #{})), + ?assertEqual(false, maps:is_key(<<"data">>, Structured)), + ExpectedMsg = #{ + <<"reward">> => <<"482143296">>, + <<"anchor">> => <<"XTzaU2_m_hRYDLiXkcleOC4zf5MVTXIeFWBOsJSRrtEZ8kM6Oz7EKLhZY7fTAvKq">>, + <<"content-type">> => <<"application/json">> + }, + ?assert(hb_message:match(ExpectedMsg, Structured, only_present)), + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"raw">>, + <<"tx">> => <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">> + }, + #{} + ), + StructuredWithData = Structured#{ <<"data">> => Data }, + ?assert(hb_message:verify(StructuredWithData, all, #{})), + DataHash = hb_util:encode(crypto:hash(sha256, Data)), + ?assertEqual(<<"PEShWA1ER2jq7CatAPpOZ30TeLrjOSpaf_Po7_hKPo4">>, DataHash), + ok. + get_tx_rsa_nested_bundle_test() -> Node = hb_http_server:start_node(), Path = <<"/~arweave@2.9-pre/tx=bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, {ok, Root} = hb_http:get(Node, Path, #{}), ?event(debug_test, {root, Root}), ?assert(hb_message:verify(Root, all, #{})), - ChildPath = <>, {ok, Child} = hb_http:get(Node, ChildPath, #{}), ?event(debug_test, {child, Child}), ?assert(hb_message:verify(Child, all, #{})), - {ok, ExpectedChild} = hb_ao:resolve( Root, @@ -423,7 +549,6 @@ get_tx_rsa_nested_bundle_test() -> #{} ), ?assert(hb_message:match(ExpectedChild, Child, only_present)), - ManualChild = #{ <<"data">> => <<"{\"totalTickedRewardsDistributed\":0,\"distributedEpochIndexes\":[],\"newDemandFactors\":[],\"newEpochIndexes\":[],\"tickedRewardDistributions\":[],\"newPruneGatewaysResults\":[{\"delegateStakeReturned\":0,\"stakeSlashed\":0,\"gatewayStakeReturned\":0,\"delegateStakeWithdrawing\":0,\"prunedGateways\":[],\"slashedGateways\":[],\"gatewayStakeWithdrawing\":0}]}">>, <<"data-protocol">> => <<"ao">>, @@ -488,3 +613,173 @@ serialize_data_item_test_disabled() -> ?assertEqual(length(DataItem#tx.tags), length(VerifiedItem#tx.tags)), ?assert(ar_bundles:verify_item(VerifiedItem)), ok. + +get_partial_chunk_post_split_test() -> + %% https://arweave.net/tx/QL7_EnmrFtx-0wVgPr2IwaGWQT8vmPcF3R20CKMO3D4/offset + %% + Offset = 378092137521399, + ExpectedLength = 1000, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"G62E7qonT1RBmkC6e3pNJz_thpS9xkVD3qTJAk6o3Uc">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_full_chunk_post_split_test() -> + %% https://arweave.net/tx/QL7_EnmrFtx-0wVgPr2IwaGWQT8vmPcF3R20CKMO3D4/offset + %% + Offset = 378092137521399, + ExpectedLength = ?DATA_CHUNK_SIZE, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"LyTBdUe0rNmpqt8C-p7HksdiredXaa0wCBAPt3504W0">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_multi_chunk_post_split_test() -> + %% https://arweave.net/tx/QL7_EnmrFtx-0wVgPr2IwaGWQT8vmPcF3R20CKMO3D4/offset + %% + Offset = 378092137521399, + ExpectedLength = ?DATA_CHUNK_SIZE * 3, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"4Cb_N0z0tMDwCiWrUbuzktfn-H6NLHT1btXGDo3CByI">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + + +%% @doc Query a chunk range that starts and ends in the middle of a chunk. +get_mid_chunk_post_split_test() -> + %% https://arweave.net/tx/QL7_EnmrFtx-0wVgPr2IwaGWQT8vmPcF3R20CKMO3D4/offset + %% + Offset = 378092137521399 + 200_000, + ExpectedLength = ?DATA_CHUNK_SIZE + 300_000, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"xkEZpGqDiCVuVZfGVyscmfYNZqYmgBLjOrMD2P_SfWs">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_partial_chunk_pre_split_test() -> + %% https://arweave.net/tx/v4ophPvV-cNp5gkpkjMuUZ-lf-fBfm1Wk-pB4vJb00E/offset + %% + Offset = 30575701172109, + ExpectedLength = 1000, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"yU5tZyDCTZ4MFcT6lng74tvx1oIbPkpCw1VAJsSqeuo">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_full_chunk_pre_split_test() -> + %% https://arweave.net/tx/v4ophPvV-cNp5gkpkjMuUZ-lf-fBfm1Wk-pB4vJb00E/offset + %% + Offset = 30575701172109, + ExpectedLength = ?DATA_CHUNK_SIZE, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"nVCvjEq9T5nxIR6jvglNbX1_CYCg0WifxfQoXhS4gik">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_multi_chunk_pre_split_test() -> + %% https://arweave.net/tx/v4ophPvV-cNp5gkpkjMuUZ-lf-fBfm1Wk-pB4vJb00E/offset + %% + Offset = 30575701172109, + ExpectedLength = ?DATA_CHUNK_SIZE * 3, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"DfS3jtLXqG3zO_IFA3P-r55SUBoeJmeIh4Eim2Rldeo">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_mid_chunk_pre_split_test() -> + %% https://arweave.net/tx/v4ophPvV-cNp5gkpkjMuUZ-lf-fBfm1Wk-pB4vJb00E/offset + %% + Offset = 30575701172109 + 200_000, + ExpectedLength = ?DATA_CHUNK_SIZE + 300_000, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"mgSfqsNapn_BXpbnIHtdeu3rQyvrjBaS0c7rEbUbtBU">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. \ No newline at end of file diff --git a/src/dev_arweave_common.erl b/src/dev_arweave_common.erl index 808b41dfc..9f784f839 100644 --- a/src/dev_arweave_common.erl +++ b/src/dev_arweave_common.erl @@ -178,7 +178,8 @@ maybe_add_bundle_tags(BundleType, TX) -> TX#tx{tags = FilteredBundleTags ++ TX#tx.tags }. %% @doc Reset the data size of a data item. Assumes that the data is already normalized. -normalize_data_size(Item = #tx{data = Bin}) when is_binary(Bin) -> +normalize_data_size(Item = #tx{data = Bin}) + when is_binary(Bin) andalso Bin =/= ?DEFAULT_DATA -> Item#tx{data_size = byte_size(Bin)}; normalize_data_size(Item) -> Item. @@ -188,9 +189,27 @@ reset_owner_address(TX) -> TX#tx{owner_address = ar_tx:get_owner_address(TX)}. +normalize_data_root(Item = #tx{data = Bin, format = 1}) + when is_binary(Bin) andalso Bin =/= ?DEFAULT_DATA -> + Item#tx{data_root = ar_tx:data_root(legacy, Bin)}; normalize_data_root(Item = #tx{data = Bin, format = 2}) when is_binary(Bin) andalso Bin =/= ?DEFAULT_DATA -> - Item#tx{data_root = ar_tx:data_root(Bin)}; + Item#tx{data_root = ar_tx:data_root(arweavejs, Bin)}; normalize_data_root(Item) -> Item. +%%%=================================================================== +%%% Tests. +%%%=================================================================== + +tagfind_test() -> + Default = <<"default">>, + ?assertEqual( + <<"v1">>, + tagfind(<<"Foo">>, [{<<"fOo">>, <<"v1">>}], Default) + ), + ?assertEqual( + Default, + tagfind(<<"Missing">>, [{<<"foo">>, <<"v">>}], Default) + ). + diff --git a/src/dev_bundler_dispatch.erl b/src/dev_bundler_dispatch.erl index 078067827..d1f290566 100644 --- a/src/dev_bundler_dispatch.erl +++ b/src/dev_bundler_dispatch.erl @@ -453,7 +453,8 @@ execute_task(#task{type = build_proofs, data = CommittedTX, opts = Opts} = Task) Data = TX#tx.data, DataRoot = TX#tx.data_root, DataSize = TX#tx.data_size, - Chunks = ar_tx:chunk_binary(?DATA_CHUNK_SIZE, Data), + Mode = ar_tx:chunking_mode(TX#tx.format), + Chunks = ar_tx:chunk_binary(Mode, ?DATA_CHUNK_SIZE, Data), SizeTaggedChunks = ar_tx:chunks_to_size_tagged_chunks(Chunks), SizeTaggedChunkIDs = ar_tx:sized_chunks_to_sized_chunk_ids(SizeTaggedChunks), {_Root, DataTree} = ar_merkle:generate_tree(SizeTaggedChunkIDs), diff --git a/src/dev_codec_tx.erl b/src/dev_codec_tx.erl index 3363969d2..1cc85a760 100644 --- a/src/dev_codec_tx.erl +++ b/src/dev_codec_tx.erl @@ -6,7 +6,8 @@ -include_lib("eunit/include/eunit.hrl"). -define(BASE_FIELDS, [ - <<"anchor">>, <<"format">>, <<"quantity">>, <<"reward">>, <<"target">> ]). + <<"anchor">>, <<"format">>, <<"quantity">>, <<"reward">>, <<"target">>, + <<"data_root">>, <<"data_size">> ]). %% @doc Sign a message using the `priv_wallet' key in the options. Supports both %% the `hmac-sha256' and `rsa-pss-sha256' algorithms, offering unsigned and @@ -169,10 +170,8 @@ to(Other, _Req, _Opts) -> %% those are checked as well (e.g. format is 1 or 2). %% 3. Unsupported fields are set to their default values. %% -%% Of note: for now we require that the `data` field be set on an L1 TX if -%% there is data. In other words we do not allow `data_root` and `data_size` to -%% be set if `data` is *not* set. This differs from the Arweave protocol which -%% explicitly allows TX headers to be validated in the absence of data. +%% Of note: `data_root`/`data_size` are optional for value transfers and should +%% be preserved when present on the header, even if `data` is missing. %% %% When support is added for new fields (e.g. when we add support for ECDSA signatures), %% this function will have to be updated. @@ -198,7 +197,7 @@ enforce_valid_tx(TX) -> {invalid_field, anchor, TX#tx.anchor} ), hb_util:ok_or_throw(TX, - hb_util:check_size(TX#tx.owner, [byte_size(?DEFAULT_OWNER)]), + hb_util:check_type(TX#tx.owner, binary), {invalid_field, owner, TX#tx.owner} ), hb_util:ok_or_throw(TX, @@ -218,7 +217,7 @@ enforce_valid_tx(TX) -> {invalid_field, data_root, TX#tx.data_root} ), hb_util:ok_or_throw(TX, - hb_util:check_size(TX#tx.signature, [65, byte_size(?DEFAULT_SIG)]), + hb_util:check_type(TX#tx.signature, binary), {invalid_field, signature, TX#tx.signature} ), hb_util:ok_or_throw(TX, @@ -262,28 +261,7 @@ enforce_valid_tx(TX) -> throw({invalid_field, tag, InvalidTagForm}) end, TX#tx.tags - ), - enforce_valid_tx_data(TX). - -%% @doc For now we require that the `data` field be set on an L1 TX if -%% there is data. In other words we do not allow `data_root` and `data_size` to -%% be set if `data` is *not* set. This differs from the Arweave protocol which -%% explicitly allows TX headers to be validated in the absence of data. -enforce_valid_tx_data(TX) when TX#tx.data == ?DEFAULT_DATA -> - case TX#tx.data_root =/= ?DEFAULT_DATA_ROOT of - true -> - throw({invalid_field, data_root, TX#tx.data_root}); - false -> - ok - end, - case TX#tx.data_size > 0 of - true -> - throw({invalid_field, data_size, TX#tx.data_size}); - false -> - ok - end; -enforce_valid_tx_data(TX) -> - ok. + ). %%%=================================================================== %%% Tests. @@ -293,7 +271,6 @@ enforce_valid_tx_test() -> BaseTX = #tx{ format = 2 }, InvalidUnsignedID = crypto:strong_rand_bytes(1), - GoodID = crypto:strong_rand_bytes(32), BadID31 = crypto:strong_rand_bytes(31), BadID33 = crypto:strong_rand_bytes(33), BadOwnerSize = crypto:strong_rand_bytes(byte_size(?DEFAULT_OWNER) - 1), @@ -317,20 +294,14 @@ enforce_valid_tx_test() -> {unsigned_id_invalid_val, BaseTX#tx{unsigned_id = InvalidUnsignedID}, {invalid_field, unsigned_id, InvalidUnsignedID}}, {anchor_too_short_31, BaseTX#tx{anchor = BadID31}, {invalid_field, anchor, BadID31}}, {anchor_too_long_33, BaseTX#tx{anchor = BadID33}, {invalid_field, anchor, BadID33}}, - {owner_wrong_size, BaseTX#tx{owner = BadOwnerSize}, {invalid_field, owner, BadOwnerSize}}, - {owner_empty, BaseTX#tx{owner = <<>>}, {invalid_field, owner, <<>>}}, + {owner_wrong_type, BaseTX#tx{owner = "hello"}, {invalid_field, owner, "hello"}}, {target_too_short_31, BaseTX#tx{target = BadID31}, {invalid_field, target, BadID31}}, {target_too_long_33, BaseTX#tx{target = BadID33}, {invalid_field, target, BadID33}}, {quantity_not_integer, BaseTX#tx{quantity = <<"100">>}, {invalid_field, quantity, <<"100">>}}, {data_size_not_integer, BaseTX#tx{data_size = an_atom}, {invalid_field, data_size, an_atom}}, {data_root_too_short_31, BaseTX#tx{data_root = BadID31}, {invalid_field, data_root, BadID31}}, {data_root_too_long_33, BaseTX#tx{data_root = BadID33}, {invalid_field, data_root, BadID33}}, - {signature_invalid_size_1, BaseTX#tx{signature = SigInvalidSize1}, {invalid_field, signature, SigInvalidSize1}}, - {signature_invalid_size_64, BaseTX#tx{signature = SigInvalidSize64}, {invalid_field, signature, SigInvalidSize64}}, - {signature_invalid_size_66, BaseTX#tx{signature = SigInvalidSize66}, {invalid_field, signature, SigInvalidSize66}}, - {signature_invalid_size_511, BaseTX#tx{signature = SigInvalidSize511}, {invalid_field, signature, SigInvalidSize511}}, - {signature_too_long_513, BaseTX#tx{signature = SigTooLong513}, {invalid_field, signature, SigTooLong513}}, - {signature_empty, BaseTX#tx{signature = <<>>}, {invalid_field, signature, <<>>}}, + {signature_invalid_type, BaseTX#tx{signature = "hello"}, {invalid_field, signature, "hello"}}, {reward_not_integer, BaseTX#tx{reward = 1.0}, {invalid_field, reward, 1.0}}, {denomination_not_zero, BaseTX#tx{denomination = 1}, {invalid_field, denomination, 1}}, {signature_type_not_rsa, BaseTX#tx{signature_type = ?ECDSA_KEY_TYPE}, {invalid_field, signature_type, ?ECDSA_KEY_TYPE}}, @@ -340,9 +311,7 @@ enforce_valid_tx_test() -> {tag_value_not_binary, BaseTX#tx{tags = [{<<"key">>, not_binary}]}, {invalid_field, tag_value, not_binary}}, {tag_value_too_long, BaseTX#tx{tags = [{<<"key">>, TooLongTagValue}]}, {invalid_field, tag_value, TooLongTagValue}}, {invalid_tag_form_atom, BaseTX#tx{tags = [not_a_tuple]}, {invalid_field, tag, not_a_tuple}}, - {invalid_tag_form_list, BaseTX#tx{tags = [[<<"name">>, <<"value">>]]}, {invalid_field, tag, [<<"name">>, <<"value">>]} }, - {data_root_without_data, BaseTX#tx{data_root = GoodID}, {invalid_field, data_root, GoodID}}, - {data_size_without_data, BaseTX#tx{data_size = 1}, {invalid_field, data_size, 1}} + {invalid_tag_form_list, BaseTX#tx{tags = [[<<"name">>, <<"value">>]]}, {invalid_field, tag, [<<"name">>, <<"value">>]} } ], lists:foreach( @@ -368,7 +337,7 @@ happy_tx_test() -> quantity = 1000, data = Data, data_size = byte_size(Data), - data_root = ar_tx:data_root(Data), + data_root = ar_tx:data_root(arweavejs, Data), reward = 2000 }, UnsignedTABM = #{ @@ -396,6 +365,57 @@ happy_tx_test() -> }, do_tx_roundtrips(TX, UnsignedTABM, SignedCommitment). +data_header_but_no_data_test() -> + Anchor = crypto:strong_rand_bytes(32), + Target = crypto:strong_rand_bytes(32), + Data = <<"test-data">>, + DataRoot = ar_tx:data_root(arweavejs, Data), + DataSize = byte_size(Data), + UnsignedTX = #tx{ + format = 2, + anchor = Anchor, + tags = [ + {<<"tag1">>, <<"value1">>} + ], + target = Target, + quantity = 1000, + data_size = DataSize, + data_root = DataRoot, + reward = 2000 + }, + UnsignedTABM = #{ + <<"anchor">> => hb_util:encode(Anchor), + <<"target">> => hb_util:encode(Target), + <<"quantity">> => <<"1000">>, + <<"reward">> => <<"2000">>, + <<"data_root">> => hb_util:encode(DataRoot), + <<"data_size">> => integer_to_binary(DataSize), + <<"tag1">> => <<"value1">> + }, + SignedCommitment = #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [ + <<"tag1">>, <<"anchor">>, <<"quantity">>, <<"reward">>, + <<"target">>, <<"data_root">>, <<"data_size">>], + <<"type">> => <<"rsa-pss-sha256">>, + <<"bundle">> => <<"false">>, + <<"field-target">> => hb_util:encode(Target), + <<"field-anchor">> => hb_util:encode(Anchor), + <<"field-quantity">> => <<"1000">>, + <<"field-reward">> => <<"2000">>, + <<"field-data_root">> => hb_util:encode(DataRoot), + <<"field-data_size">> => integer_to_binary(DataSize) + }, + do_tx_roundtrips( + UnsignedTX, + UnsignedTABM, + SignedCommitment, + #{ + <<"bundle">> => false, + <<"exclude-data">> => true + } + ). + tag_name_case_test() -> TX = #tx{ format = 2, @@ -530,7 +550,7 @@ ao_data_key_test() -> ], data = Data, data_size = byte_size(Data), - data_root = ar_tx:data_root(Data) + data_root = ar_tx:data_root(arweavejs, Data) }, SignedCommitment = #{ <<"commitment-device">> => <<"tx@1.0">>, @@ -760,6 +780,12 @@ real_ecdsa_single_item_bundle_tx_test_disabled() -> [] ). +real_2048_bit_rsa_tx_test() -> + do_real_tx_verify( + <<"tj76flZk936u0S2owyEzUFBvBAYle9Al5LH8zJ7icNc">>, + [<<"tj76flZk936u0S2owyEzUFBvBAYle9Al5LH8zJ7icNc">>] + ). + real_no_data_tx_test() -> do_real_tx_verify( <<"N1Cyu67lQtmZMQlIZVFpNfy3xz6k9wEZ8LLeDbOebbk">>, @@ -799,24 +825,46 @@ do_real_tx_verify(TXID, ExpectedIDs) -> end, ?event(debug_test, {tx, {explicit, TX}}), ?assert(ar_tx:verify(TX)), - + StructuredTX = hb_message:convert( + TX, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + ), + ?assert(hb_message:verify(StructuredTX, all, #{})), Deserialized = ar_bundles:deserialize(TX), ?event(debug_test, {deserialized}), + verify_items(Deserialized, ExpectedIDs, Opts). - verify_items(Deserialized, ExpectedIDs). - -verify_items(RootItem, ExpectedIDs) -> +verify_items(RootItem, ExpectedIDs, Opts) -> AllItems = flatten_items(RootItem), ?assertEqual(length(ExpectedIDs), length(AllItems)), [RootItem | NestedItems] = AllItems, [RootID | NestedIDs] = ExpectedIDs, + NormalizedRootItem = dev_arweave_common:normalize(RootItem), ?assert( - ar_tx:verify(dev_arweave_common:normalize(RootItem)), + ar_tx:verify(NormalizedRootItem), + hb_util:encode(RootItem#tx.id)), + StructuredRootItem = hb_message:convert( + NormalizedRootItem, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + ), + ?assert(hb_message:verify(StructuredRootItem, all, Opts), hb_util:encode(RootItem#tx.id)), ?assertEqual(RootID, hb_util:encode(RootItem#tx.id)), lists:zipwith( fun(Item, ExpectedID) -> ?assert(ar_bundles:verify_item(Item), hb_util:encode(Item#tx.id)), + StructuredItem = hb_message:convert( + Item, + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + ), + ?assert(hb_message:verify(StructuredItem, all, Opts), + hb_util:encode(Item#tx.id)), ?assertEqual(ExpectedID, hb_util:encode(Item#tx.id)) end, NestedItems, @@ -844,10 +892,13 @@ flatten_items(_) -> do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment) -> % For tests which don't care about bundling, just use false. do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, false). -do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, Bundle) -> - Req = #{ <<"bundle">> => Bundle }, +do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, Req) when is_map(Req) -> do_unsigned_tx_roundtrip(UnsignedTX, UnsignedTABM, Req), - do_signed_tx_roundtrip(UnsignedTX, UnsignedTABM, Commitment, Req). + do_signed_tx_roundtrip(UnsignedTX, UnsignedTABM, Commitment, Req); +do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, Bundle) + when is_boolean(Bundle) -> + Req = #{ <<"bundle">> => Bundle }, + do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, Req). do_unsigned_tx_roundtrip(UnsignedTX, UnsignedTABM, Req) -> % Serialize -> Deserialize diff --git a/src/dev_codec_tx_from.erl b/src/dev_codec_tx_from.erl index f21664217..fbe817f2e 100644 --- a/src/dev_codec_tx_from.erl +++ b/src/dev_codec_tx_from.erl @@ -14,7 +14,9 @@ fields(TX, Prefix, Opts) -> target_field(TX, Prefix, Opts), anchor_field(TX, Prefix, Opts), quantity_field(TX, Prefix, Opts), - reward_field(TX, Prefix, Opts) + reward_field(TX, Prefix, Opts), + data_root_field(TX, Prefix, Opts), + data_size_field(TX, Prefix, Opts) ] ). @@ -58,3 +60,16 @@ reward_field(TX, Prefix, _Opts) -> } end. +data_root_field(#tx{data = ?DEFAULT_DATA, data_root = ?DEFAULT_DATA_ROOT}, _Prefix, _Opts) -> + #{}; +data_root_field(#tx{data = ?DEFAULT_DATA, data_root = DataRoot}, Prefix, _Opts) -> + #{<> => hb_util:encode(DataRoot)}; +data_root_field(_TX, _Prefix, _Opts) -> + #{}. + +data_size_field(#tx{data = ?DEFAULT_DATA, data_size = ?DEFAULT_DATA_SIZE}, _Prefix, _Opts) -> + #{}; +data_size_field(#tx{data = ?DEFAULT_DATA, data_size = DataSize}, Prefix, _Opts) -> + #{<> => integer_to_binary(DataSize)}; +data_size_field(_TX, _Prefix, _Opts) -> + #{}. diff --git a/src/dev_codec_tx_to.erl b/src/dev_codec_tx_to.erl index f0fb6f586..da7094985 100644 --- a/src/dev_codec_tx_to.erl +++ b/src/dev_codec_tx_to.erl @@ -9,7 +9,9 @@ fields_to_tx(TX, Prefix, Map, Opts) -> target = target_field(Prefix, Map, Opts), anchor = anchor_field(Prefix, Map, Opts), quantity = quantity_field(Prefix, Map, Opts), - reward = reward_field(Prefix, Map, Opts) + reward = reward_field(Prefix, Map, Opts), + data_root = data_root_field(Prefix, Map, Opts), + data_size = data_size_field(Prefix, Map, Opts) }. format_field(Prefix, Map, Opts) -> @@ -62,11 +64,43 @@ reward_field(Prefix, Map, Opts) -> error -> ?DEFAULT_REWARD end. +data_root_field(Prefix, Map, Opts) -> + case hb_maps:get(<<"data">>, Map, ?DEFAULT_DATA, Opts) of + ?DEFAULT_DATA -> + case hb_maps:find(<>, Map, Opts) of + {ok, EncodedDataRoot} -> + case hb_util:safe_decode(EncodedDataRoot) of + {ok, DataRoot} when ?IS_ID(DataRoot) -> DataRoot; + _ -> ?DEFAULT_DATA_ROOT + end; + error -> ?DEFAULT_DATA_ROOT + end; + _ -> + ?DEFAULT_DATA_ROOT + end. + +data_size_field(Prefix, Map, Opts) -> + case hb_maps:get(<<"data">>, Map, ?DEFAULT_DATA, Opts) of + ?DEFAULT_DATA -> + case hb_maps:find(<>, Map, Opts) of + {ok, EncodedDataSize} -> + case hb_util:safe_int(EncodedDataSize) of + {ok, DataSize} -> DataSize; + _ -> ?DEFAULT_DATA_SIZE + end; + error -> ?DEFAULT_DATA_SIZE + end; + _ -> + ?DEFAULT_DATA_SIZE + end. + excluded_tags(TX, TABM, Opts) -> exclude_target_tag(TX, TABM, Opts) ++ exclude_anchor_tag(TX, TABM, Opts) ++ exclude_quantity_tag(TX, TABM, Opts) ++ - exclude_reward_tag(TX, TABM, Opts). + exclude_reward_tag(TX, TABM, Opts) ++ + exclude_data_root_tag(TX) ++ + exclude_data_size_tag(TX). exclude_target_tag(TX, TABM, Opts) -> case {TX#tx.target, hb_maps:get(<<"target">>, TABM, undefined, Opts)} of @@ -98,4 +132,16 @@ exclude_reward_tag(TX, TABM, Opts) -> {FieldReward, TagReward} when FieldReward =/= TagReward -> [<<"reward">>]; _ -> [] + end. + +exclude_data_root_tag(TX) -> + case TX#tx.data_root of + ?DEFAULT_DATA_ROOT -> []; + _ -> [<<"data_root">>] + end. + +exclude_data_size_tag(TX) -> + case TX#tx.data_size of + ?DEFAULT_DATA_SIZE -> []; + _ -> [<<"data_size">>] end. \ No newline at end of file diff --git a/src/dev_copycat_arweave.erl b/src/dev_copycat_arweave.erl index 6ff8f822f..4e5b820d7 100644 --- a/src/dev_copycat_arweave.erl +++ b/src/dev_copycat_arweave.erl @@ -10,6 +10,8 @@ -define(ARWEAVE_DEVICE, <<"~arweave@2.9-pre">>). +% GET /~cron@1.0/once&cron-path=~copycat@1.0/arweave + %% @doc Fetch blocks from an Arweave node between a given range, or from the %% latest known block towards the Genesis block. If no range is provided, we %% fetch blocks from the latest known block towards the Genesis block. @@ -23,27 +25,36 @@ parse_range(Request, Opts) -> case hb_maps:find(<<"from">>, Request, Opts) of {ok, Height} -> Height; error -> - {ok, LatestHeight} = - hb_ao:resolve( - <>, - Opts - ), - LatestHeight + case hb_ao:resolve( + <>, + Opts + ) of + {ok, LatestHeight} -> LatestHeight; + {error, _} -> 0 + end end, To = hb_maps:get(<<"to">>, Request, 0, Opts), - {From, To}. + {hb_util:int(From), hb_util:int(To)}. + + %% @doc Fetch blocks from an Arweave node between a given range. -fetch_blocks(Req, Current, Current, _Opts) -> +fetch_blocks(Req, Current, To, _Opts) when Current < To -> ?event(copycat_arweave, {arweave_block_indexing_completed, - {reached_target, Current}, + {reached_target, To}, {initial_request, Req} } ), - {ok, Current}; + {ok, To}; fetch_blocks(Req, Current, To, Opts) -> - BlockRes = + observe_event(<<"block_indexed">>, fun() -> + fetch_and_process_block(Current, To, Opts) + end), + fetch_blocks(Req, Current - 1, To, Opts). + +fetch_and_process_block(Current, To, Opts) -> + BlockRes = observe_event(<<"block_header">>, fun() -> hb_ao:resolve( << ?ARWEAVE_DEVICE/binary, @@ -51,27 +62,478 @@ fetch_blocks(Req, Current, To, Opts) -> (hb_util:bin(Current))/binary >>, Opts - ), - process_block(BlockRes, Req, Current, To, Opts), - fetch_blocks(Req, Current - 1, To, Opts). + ) + end), + process_block(BlockRes, Current, To, Opts). %% @doc Process a block. -process_block(BlockRes, _Req, Current, To, _Opts) -> +process_block(BlockRes, Current, To, Opts) -> case BlockRes of - {ok, _} -> + {ok, Block} -> + {ItemsIndexed, TotalTXs, BundleTXs, SkippedTXs} = maybe_index_ids(Block, Opts), ?event( copycat_short, {arweave_block_cached, {height, Current}, + {items_indexed, ItemsIndexed}, + {total_txs, TotalTXs}, + {bundle_txs, BundleTXs}, + {skipped_txs, SkippedTXs}, {target, To} } ); - {error, not_found} -> + {error, _} = Error -> ?event( copycat_short, {arweave_block_not_found, {height, Current}, - {target, To} - } + {target, To}, + {reason, Error}} + ) + end. + +%% @doc Index the IDs of all transactions in the block if configured to do so. +maybe_index_ids(Block, Opts) -> + TotalTXs = length(hb_maps:get(<<"txs">>, Block, [], Opts)), + case hb_opts:get(arweave_index_ids, false, Opts) of + false -> {0, TotalTXs, 0, 0}; + true -> + BlockEndOffset = hb_util:int( + hb_maps:get(<<"weave_size">>, Block, 0, Opts)), + BlockSize = hb_util:int( + hb_maps:get(<<"block_size">>, Block, 0, Opts)), + BlockStartOffset = BlockEndOffset - BlockSize, + {TXs, SkippedFromHeaders} = resolve_tx_headers(hb_maps:get(<<"txs">>, Block, [], Opts), Opts), + Height = hb_maps:get(<<"height">>, Block, 0, Opts), + TXsWithData = ar_block:generate_size_tagged_list_from_txs(TXs, Height), + % Filter out padding entries before processing + ValidTXs = lists:filter( + fun({{padding, _}, _}) -> false; (_) -> true end, + TXsWithData + ), + {ItemsIndexed, BundleTXs, SkippedFromBundles} = + process_txs(ValidTXs, BlockStartOffset, Opts), + SkippedTXs = SkippedFromHeaders + SkippedFromBundles, + {ItemsIndexed, TotalTXs, BundleTXs, SkippedTXs} + end. + +%% @doc Apply Fun to each item in Items with parallel workers. +%% Fun takes an item and returns a result. +%% Returns a list of results in the same order as the input items. +%% Uses arweave_index_workers from Opts to determine max concurrency (default 1 = sequential). +parallel_map(Items, Fun, Opts) -> + MaxWorkers = max(1, hb_opts:get(arweave_index_workers, 1, Opts)), + Parent = self(), + ItemsWithRefs = [{Item, make_ref()} || Item <- Items], + % Spawn initial batch up to MaxWorkers + {ToSpawn, Remaining} = lists:split(min(length(ItemsWithRefs), MaxWorkers), ItemsWithRefs), + ActiveRefs = [spawn_worker(ItemWithRef, Fun, Parent) || ItemWithRef <- ToSpawn], + % Wait for workers to complete and refill pool, collecting results + ResultsMap = parallel_map_wait(ActiveRefs, Remaining, Fun, MaxWorkers, Parent, #{}), + % Return results in order by matching refs (inspired by pmap pattern) + [maps:get(Ref, ResultsMap) || {_Item, Ref} <- ItemsWithRefs]. + +%% @doc Spawn a worker process for a single item. +spawn_worker({Item, Ref}, Fun, Parent) -> + spawn(fun() -> + Result = Fun(Item), + Parent ! {pmap_work, Ref, Result} + end), + Ref. + +%% @doc Wait for workers to complete and refill the pool as slots become available. +parallel_map_wait([], [], _Fun, _MaxWorkers, _Parent, ResultsMap) -> + ResultsMap; +parallel_map_wait(ActiveRefs, Remaining, Fun, MaxWorkers, Parent, ResultsMap) -> + receive + {pmap_work, CompletedRef, Result} -> + % Store result and remove completed ref + NewResultsMap = ResultsMap#{CompletedRef => Result}, + NewActiveRefs = lists:delete(CompletedRef, ActiveRefs), + case Remaining of + [] -> + % No more items, just wait for remaining workers + parallel_map_wait(NewActiveRefs, [], Fun, MaxWorkers, Parent, NewResultsMap); + _ -> + % Spawn replacement worker + [NextItemWithRef | NewRemaining] = Remaining, + NextRef = spawn_worker(NextItemWithRef, Fun, Parent), + parallel_map_wait([NextRef | NewActiveRefs], NewRemaining, Fun, MaxWorkers, Parent, NewResultsMap) + end + end. + +%% @doc Process a single transaction and return its contribution to the counters. +%% Returns a map with keys: items_count, bundle_count, skipped_count +process_tx({{padding, _PaddingRoot}, _EndOffset}, _BlockStartOffset, _Opts) -> + #{items_count => 0, bundle_count => 0, skipped_count => 0}; +process_tx({{TX, _TXDataRoot}, EndOffset}, BlockStartOffset, Opts) -> + case is_bundle_tx(TX, Opts) of + false -> #{items_count => 0, bundle_count => 0, skipped_count => 0}; + true -> + IndexStore = hb_opts:get(arweave_index_store, no_store, Opts), + TXID = hb_util:encode(TX#tx.id), + TXEndOffset = BlockStartOffset + EndOffset, + TXStartOffset = TXEndOffset - TX#tx.data_size, + observe_event(<<"item_indexed">>, fun() -> + hb_store_arweave:write_offset( + IndexStore, + TXID, + true, + TXStartOffset, + TX#tx.data_size + ) + end), + BundleRes = download_bundle_header( + TXEndOffset, TX#tx.data_size, Opts + ), + case BundleRes of + {ok, {BundleIndex, HeaderSize}} -> + % Batch event tracking: measure total time and count for all write_offset calls + {TotalTime, {_, ItemsCount}} = timer:tc(fun() -> + lists:foldl( + fun({ItemID, Size}, {ItemStartOffset, ItemsCountAcc}) -> + hb_store_arweave:write_offset( + IndexStore, + hb_util:encode(ItemID), + false, + ItemStartOffset, + Size + ), + {ItemStartOffset + Size, ItemsCountAcc + 1} + end, + {TXStartOffset + HeaderSize, 0}, + BundleIndex + ) + end), + % Single event increment for the batch + record_event_metrics(<<"item_indexed">>, ItemsCount, TotalTime), + #{items_count => ItemsCount, bundle_count => 1, skipped_count => 0}; + {error, Reason} -> + ?event( + copycat_short, + {arweave_bundle_skipped, + {tx_id, {explicit, TXID}}, + {reason, Reason} + } + ), + #{items_count => 0, bundle_count => 1, skipped_count => 1} + end + end. + +%% @doc Process transactions: spawn workers and manage the worker pool. +%% This function processes transactions in parallel using parallel_map. +%% When arweave_index_workers <= 1, processes sequentially (one worker at a time). +%% When arweave_index_workers > 1, processes in parallel with the specified concurrency limit. +%% Returns {ItemsIndexed, BundleTXs, SkippedTXs}. +process_txs(ValidTXs, BlockStartOffset, Opts) -> + Results = parallel_map( + ValidTXs, + fun(TXWithData) -> process_tx(TXWithData, BlockStartOffset, Opts) end, + Opts + ), + Aggregated = lists:foldl( + fun(Result, Acc) -> + #{ + items_count => maps:get(items_count, Result, 0) + maps:get(items_count, Acc, 0), + bundle_count => maps:get(bundle_count, Result, 0) + maps:get(bundle_count, Acc, 0), + skipped_count => maps:get(skipped_count, Result, 0) + maps:get(skipped_count, Acc, 0) + } + end, + #{items_count => 0, bundle_count => 0, skipped_count => 0}, + Results + ), + { + maps:get(items_count, Aggregated, 0), + maps:get(bundle_count, Aggregated, 0), + maps:get(skipped_count, Aggregated, 0) + }. + +is_bundle_tx(TX, _Opts) -> + dev_arweave_common:type(TX) =/= binary. + +download_bundle_header(EndOffset, Size, Opts) -> + observe_event(<<"bundle_header">>, fun() -> + StartOffset = EndOffset - Size + 1, + case hb_ao:resolve( + << + ?ARWEAVE_DEVICE/binary, + "/chunk&offset=", + (hb_util:bin(StartOffset))/binary + >>, + Opts + ) of + {ok, FirstChunk} -> + % Most bundle headers can fit in a single chunk, but those with + % thousands of items might require multiple chunks to fully + % represent the item index. + HeaderSize = ar_bundles:bundle_header_size(FirstChunk), + case header_chunk(HeaderSize, FirstChunk, StartOffset, Opts) of + {ok, BundleHeader} -> + {_ItemsBin, BundleIndex} = + ar_bundles:decode_bundle_header(BundleHeader), + {ok, {BundleIndex, HeaderSize}}; + Error -> + Error + end; + Error -> + Error + end + end). + +header_chunk(HeaderSize, FirstChunk, _StartOffset, _Opts) + when HeaderSize =< byte_size(FirstChunk) -> + {ok, FirstChunk}; +header_chunk(HeaderSize, _FirstChunk, StartOffset, Opts) -> + hb_ao:resolve( + << + ?ARWEAVE_DEVICE/binary, + "/chunk&offset=", + (hb_util:bin(StartOffset))/binary, + "&length=", + (hb_util:bin(HeaderSize))/binary + >>, + Opts + ). + +resolve_tx_headers(TXIDs, Opts) -> + Results = parallel_map( + TXIDs, + fun(TXID) -> resolve_tx_header(TXID, Opts) end, + Opts + ), + lists:foldr( + fun(Res, {Acc, SkippedAcc}) -> + case Res of + {ok, TX} -> {[TX | Acc], SkippedAcc}; + skip -> {Acc, SkippedAcc + 1} + end + end, + {[], 0}, + Results + ). + +resolve_tx_header(TXID, Opts) -> + try + ResolveRes = observe_event(<<"tx_header">>, fun() -> + hb_ao:resolve( + << + ?ARWEAVE_DEVICE/binary, + "/tx&tx=", + TXID/binary, + "&exclude-data=true" + >>, + Opts ) - end. \ No newline at end of file + end), + case ResolveRes of + {ok, StructuredTXHeader} -> + {ok, + hb_message:convert( + StructuredTXHeader, + <<"tx@1.0">>, + <<"structured@1.0">>, + Opts)}; + {error, ResolveError} -> + ?event( + copycat_short, + {arweave_tx_skipped, + {tx_id, {explicit, TXID}}, + {reason, ResolveError} + } + ), + skip + end + catch + Class:Reason:_ -> + ?event( + copycat_short, + {arweave_tx_skipped, + {tx_id, {explicit, TXID}}, + {class, Class}, + {reason, Reason} + } + ), + skip + end. + +%% @doc Record event metrics (count and duration) using hb_event:increment. +record_event_metrics(MetricName, Count, Duration) -> + hb_event:increment(<<"arweave_block_count">>, MetricName, #{}, Count), + hb_event:increment(<<"arweave_block_duration">>, MetricName, #{}, Duration). + +%% @doc Track an operation's execution time and count using hb_event:increment. +%% Always tracks both count and duration, regardless of success/failure. +observe_event(MetricName, Fun) -> + {Time, Result} = timer:tc(Fun), + record_event_metrics(MetricName, 1, Time), + Result. + +%%% Tests + +index_ids_test() -> + %% Test block: https://viewblock.io/arweave/block/1827942 + %% Note: this block includes a data item with an Ethereum signature. This + %% signature type is not yet (as of Jan 2026) supported by ar_bundles.erl, + %% however we should still be able to index it (we just can't deserialize + %% it). + {_TestStore, StoreOpts, Opts} = setup_index_opts(), + {ok, 1827942} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942">>, + Opts + ), + % WbRAQbeyjPHgopBKyi0PLeKWvYZr3rgZvQ7QY3ASJS4 is a bundle signed with + % an Ethereum signature which is not supported by HB as of Jan 2026. + % The bundle should be indexed even though we can't deserialized the + % bundle itself. + ?assertException( + error, + {badmatch, unsupported_tx_format}, + hb_store_arweave:read( + StoreOpts, + <<"WbRAQbeyjPHgopBKyi0PLeKWvYZr3rgZvQ7QY3ASJS4">>) + ), + % These 3 items are within the WbRAQbeyjPHgopBKyi0PLeKWvYZr3rgZvQ7QY3ASJS4 + % bundle. + assert_item_read(Opts, + <<"ATi9pQF_eqb99UK84R5rq8lGfRGpilVQOYyth7rXxh8">>), + assert_item_read(Opts, + <<"4VSfUbhMVZQHW5VfVwQZOmC5fR3W21DZgFCyz8CA-cE">>), + assert_item_read(Opts, + <<"ZQRHZhktk6dAtX9BlhO1teOtVlGHoyaWP25kAlhxrM4">>), + % The T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs can be deserialized so + % we'll verify that some of its items were index and match the version + % in the deserialized bundle. + assert_bundle_read( + Opts, + <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + [ + {<<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, <<"1">>}, + {<<"MgatoEjlO_YtdbxFi9Q7Hxbs0YQVcChddhSS7FsdeIg">>, <<"19">>}, + {<<"z-oKJfhMq5qoVFrljEfiBKgumaJmCWVxNJaavR5aPE8">>, <<"26">>} + ] + ), + % Non-ans104 transaction in the block should not be indexed. + ?assertEqual({error, not_found}, + hb_store_arweave:read(StoreOpts, + <<"bXEgFm4K2b5VD64skBNAlS3I__4qxlM3Sm4Z5IXj3h8">>)), + % Another bundle with an unsupported signature should be indexed even if + % it can't be deserialized. + ?assertException( + error, + {badmatch, unsupported_tx_format}, + hb_store_arweave:read( + StoreOpts, + <<"kK67S13W_8jM9JUw2umVamo0zh9v1DeVxWrru2evNco">>) + ), + assert_bundle_read( + Opts, + <<"c2ATDuTgwKCcHpAFZqSt13NC-tA4hdA7Aa2xBPuOzoE">>, + [ + {<<"OBKr-7UrmjxFD-h-qP-XLuvCgtyuO_IDpBMgIytvusA">>, <<"1">>} + ] + ), + ok. + +bundle_header_index_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + TXID = <<"bnMTI7LglBGSaK5EdV_juh6GNtXLm0cd5lkd2q4nlT0">>, + {ok, #{ <<"body">> := OffsetBody }} = + hb_http:request( + #{ + <<"path">> => <<"/arweave/tx/", TXID/binary, "/offset">>, + <<"method">> => <<"GET">> + }, + Opts + ), + OffsetMsg = hb_json:decode(OffsetBody), + EndOffset = hb_util:int(maps:get(<<"offset">>, OffsetMsg)), + Size = hb_util:int(maps:get(<<"size">>, OffsetMsg)), + {ok, {BundleIndex, _HeaderSize}} = + download_bundle_header(EndOffset, Size, Opts), + ?assertEqual(15000, length(BundleIndex)), + ok. + +index_ids_ecdsa_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {ok, 1827904} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827904&to=1827904">>, + Opts + ), + assert_bundle_read( + Opts, + <<"VNhX_pSANk_8j0jZBR5bh_5jr-lkfbHDjtHd8FKqx7U">>, + [ + {<<"3xDKhrCQcPuBtcm1ipZS5C9gAfFYClgHuHOHAXGfchM">>, <<"1">>}, + {<<"JantC8f89VE-RidArHnU9589gY5T37NDXnWpI7H_psc">>, <<"7">>} + ] + ), + ok. + +non_string_tags_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Res = resolve_tx_header(<<"752P6t4cOjMabYHqzC6hyLhxyo4YKZLblg7va_J21YE">>, Opts), + ?assertEqual(skip, Res), + ok. + +setup_index_opts() -> + TestStore = hb_test_utils:test_store(), + StoreOpts = #{ <<"index-store">> => [TestStore] }, + Store = [ + TestStore, + #{ + <<"store-module">> => hb_store_fs, + <<"name">> => <<"cache-mainnet">> + }, + #{ + <<"store-module">> => hb_store_arweave, + <<"name">> => <<"cache-arweave">>, + <<"index-store">> => [TestStore], + <<"arweave-node">> => <<"https://arweave.net">> + }, + #{ + <<"store-module">> => hb_store_gateway, + <<"subindex">> => [ + #{ + <<"name">> => <<"Data-Protocol">>, + <<"value">> => <<"ao">> + } + ], + <<"local-store">> => [TestStore] + }, + #{ + <<"store-module">> => hb_store_gateway, + <<"local-store">> => [TestStore] + } + ], + Opts = #{ + store => Store, + arweave_index_ids => true, + arweave_index_store => StoreOpts + }, + {TestStore, StoreOpts, Opts}. + +assert_bundle_read(Opts, BundleID, ExpectedItems) -> + ReadItems = + lists:map( + fun({ItemID, _Index}) -> + assert_item_read(Opts, ItemID) + end, + ExpectedItems + ), + Bundle = assert_item_read(Opts, BundleID), + lists:foreach( + fun({{_ItemID, Index}, Item}) -> + QueriedItem = hb_ao:get(Index, Bundle, Opts), + ?assertEqual(hb_maps:without(?AO_CORE_KEYS, Item), hb_maps:without(?AO_CORE_KEYS, QueriedItem)) + end, + lists:zip(ExpectedItems, ReadItems) + ), + ok. + +assert_item_read(Opts, ItemID) -> + {ok, Item} = hb_ao:resolve(ItemID, Opts), + ?assert(hb_message:verify(Item, all, Opts)), + ?assertEqual(ItemID, hb_message:id(Item, signed)), + Item. diff --git a/src/hb_http.erl b/src/hb_http.erl index 2f20d1764..8f7f3cc2e 100644 --- a/src/hb_http.erl +++ b/src/hb_http.erl @@ -104,23 +104,34 @@ request(Method, Peer, Path, RawMessage, Opts) -> ), StartTime = os:system_time(millisecond), % Perform the HTTP request. - {_ErlStatus, Status, Headers, Body} = hb_http_client:request(Req, Opts), + Response = hb_http_client:request(Req, Opts), % Process the response. EndTime = os:system_time(millisecond), - ?event(http_outbound, - { - http_response, - {req, Req}, - {response, - #{ - status => Status, - headers => Headers, - body => Body - } - } - }, - Opts - ), + Duration = EndTime - StartTime, + case Response of + {_ErlStatus, Status, Headers, Body} -> + ?event(http_outbound, + { + http_response, + {req, Req}, + {response, + #{ + status => Status, + headers => Headers, + body => Body + } + } + }, + Opts + ), + request_response(Method, Peer, Path, Response, Duration, Opts); + Error -> + Error + end. + + +request_response(Method, Peer, Path, Response, Duration, Opts) -> + {_ErlStatus, Status, Headers, Body} = Response, % Convert the set-cookie headers into a cookie message, if they are present. % We do this by extracting the set-cookie headers and converting them into a % cookie message if they are present. @@ -161,7 +172,7 @@ request(Method, Peer, Path, RawMessage, Opts) -> ?event(http_short, {received, {status, Status}, - {duration, EndTime - StartTime}, + {duration, Duration}, {method, Method}, {peer, Peer}, {path, {string, Path}}, @@ -180,7 +191,7 @@ request(Method, Peer, Path, RawMessage, Opts) -> ), case {Key, hb_maps:get(Key, Msg, undefined, Opts)} of {<<"body">>, undefined} -> - {response_status_to_atom(Status), <<>>}; + {hb_http_client:response_status_to_atom(Status), <<>>}; {_, undefined} -> {failure, << @@ -195,7 +206,7 @@ request(Method, Peer, Path, RawMessage, Opts) -> >> }; {_, Value} -> - {response_status_to_atom(Status), Value} + {hb_http_client:response_status_to_atom(Status), Value} end; false -> % Find the codec device from the headers, if set. @@ -215,14 +226,6 @@ request(Method, Peer, Path, RawMessage, Opts) -> ) end. -%% @doc Convert a HTTP status code to a status atom. -response_status_to_atom(Status) -> - case Status of - 201 -> created; - X when X < 400 -> ok; - X when X < 500 -> error; - _ -> failure - end. %% @doc Convert an HTTP response to a message. outbound_result_to_message(<<"ans104@1.0">>, Status, Headers, Body, Opts) -> @@ -233,7 +236,7 @@ outbound_result_to_message(<<"ans104@1.0">>, Status, Headers, Body, Opts) -> try ar_bundles:deserialize(Body) of Deserialized -> { - response_status_to_atom(Status), + hb_http_client:response_status_to_atom(Status), hb_message:convert( Deserialized, <<"structured@1.0">>, @@ -259,7 +262,7 @@ outbound_result_to_message(<<"ans104@1.0">>, Status, Headers, Body, Opts) -> outbound_result_to_message(<<"httpsig@1.0">>, Status, Headers, Body, Opts) -> ?event(http_outbound, {result_is_httpsig, {body, Body}}, Opts), { - response_status_to_atom(Status), + hb_http_client:response_status_to_atom(Status), http_response_to_httpsig(Status, Headers, Body, Opts) }. @@ -1254,3 +1257,16 @@ index_request_test() -> #{} ), ?assertEqual(<<"i like dogs!">>, hb_ao:get(<<"body">>, Res, #{})). + +request_error_handling_test() -> + Opts = #{}, + NonExistentDomain = <<"http://nonexistent.invalid:80">>, + Result = hb_http:request( + <<"GET">>, + NonExistentDomain, + <<"/">>, + #{}, + Opts + ), + % The result should be an error tuple, not crash with badmatch + ?assertMatch({error, _}, Result). diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index 3611cc088..4ee9f008b 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -3,7 +3,7 @@ -module(hb_http_client). -behaviour(gen_server). -include("include/hb.hrl"). --export([start_link/1, request/2]). +-export([start_link/1, response_status_to_atom/1, request/2]). -export([init/1, handle_cast/2, handle_call/3, handle_info/2, terminate/2]). -record(state, { @@ -24,12 +24,28 @@ start_link(Opts) -> gen_server:start_link({local, ?MODULE}, ?MODULE, Opts, []). +%% @doc Convert a HTTP status code to a status atom. +response_status_to_atom(Status) -> + case Status of + 201 -> created; + X when X < 400 -> ok; + X when X < 500 -> error; + _ -> failure + end. + request(Args, Opts) -> request(Args, hb_opts:get(http_retry, ?DEFAULT_RETRIES, Opts), Opts). request(Args, RemainingRetries, Opts) -> - case do_request(Args, Opts) of - {error, Details} -> maybe_retry(RemainingRetries, Args, Details, Opts); - {ok, Status, Headers, Body} -> {ok, Status, Headers, Body} + Response = do_request(Args, Opts), + case Response of + {error, _Details} -> maybe_retry(RemainingRetries, Args, Response, Opts); + {ok, Status, _Headers, _Body} -> + StatusAtom = response_status_to_atom(Status), + RetryResponses = hb_opts:get(http_retry_response, [], Opts), + case lists:member(StatusAtom, RetryResponses) of + true -> maybe_retry(RemainingRetries, Args, Response, Opts); + false -> Response + end end. do_request(Args, Opts) -> @@ -38,8 +54,8 @@ do_request(Args, Opts) -> httpc -> httpc_req(Args, Opts) end. -maybe_retry(0, _, ErrDetails, _) -> {error, ErrDetails}; -maybe_retry(Remaining, Args, ErrDetails, Opts) -> +maybe_retry(0, _, OriginalResponse, _) -> OriginalResponse; +maybe_retry(Remaining, Args, OriginalResponse, Opts) -> RetryBaseTime = hb_opts:get(http_retry_time, ?DEFAULT_RETRY_TIME, Opts), RetryTime = case hb_opts:get(http_retry_mode, backoff, Opts) of @@ -48,6 +64,10 @@ maybe_retry(Remaining, Args, ErrDetails, Opts) -> BaseRetries = hb_opts:get(http_retry, ?DEFAULT_RETRIES, Opts), RetryBaseTime * (1 + (BaseRetries - Remaining)) end, + ErrDetails = case OriginalResponse of + {error, Details} -> Details; + {ok, Status, _, _} -> Status + end, ?event( warning, {retrying_http_request, diff --git a/src/hb_opts.erl b/src/hb_opts.erl index 5536ad523..b334bb7e5 100644 --- a/src/hb_opts.erl +++ b/src/hb_opts.erl @@ -287,6 +287,16 @@ default_message() -> } ] }, + #{ + % Routes for Arweave transaction requests to use a remote gateway. + <<"template">> => <<"/chunk">>, + <<"node">> => + #{ + <<"match">> => <<"^/arweave">>, + <<"with">> => <<"https://neo-arweave.zephyrdev.xyz">>, + <<"opts">> => #{ http_client => httpc, protocol => http2 } + } + }, #{ % Routes for Arweave transaction requests to use a remote gateway. <<"template">> => <<"/arweave">>, @@ -314,6 +324,12 @@ default_message() -> <<"store-module">> => hb_store_fs, <<"name">> => <<"cache-mainnet">> }, + #{ + <<"store-module">> => hb_store_arweave, + <<"name">> => <<"cache-arweave">>, + <<"index-store">> => [?DEFAULT_PRIMARY_STORE], + <<"arweave-node">> => <<"https://arweave.net">> + }, #{ <<"store-module">> => hb_store_gateway, <<"subindex">> => [ diff --git a/src/hb_singleton.erl b/src/hb_singleton.erl index 7b086afc8..a38cf957c 100644 --- a/src/hb_singleton.erl +++ b/src/hb_singleton.erl @@ -335,19 +335,20 @@ do_build(I, [Msg | Rest], ScopedKeys, Opts) -> %% 2. Part subpath resolutions %% 3. Inlined key-value pairs %% 4. Device specifier -parse_part(ID, _Opts) when ?IS_ID(ID) -> ID; parse_part(Part, Opts) -> case maybe_subpath(Part, Opts) of {resolve, Subpath} -> {resolve, Subpath}; Part -> case part([$&, $~, $+, $ , $=], Part) of + {no_match, PartKey, <<>>} when ?IS_ID(PartKey) -> + PartKey; {no_match, PartKey, <<>>} -> #{ <<"path">> => PartKey }; {Sep, PartKey, PartModBin} -> parse_part_mods( << Sep:8/integer, PartModBin/binary >>, #{ <<"path">> => PartKey }, - Opts + Opts ) end end. @@ -772,6 +773,20 @@ inlined_keys_test() -> ?assertEqual(not_found, hb_maps:get(<<"k1">>, Base, not_found)), ?assertEqual(not_found, hb_maps:get(<<"k2">>, Msg2, not_found)). +inlined_keys_long_segment_test() -> + Req = #{ + <<"path">> => + <<"/chunk&offset=377813969707255&length=262144">> + }, + Msgs = from(Req, #{}), + ?assertEqual(2, length(Msgs)), + [Base, Msg] = Msgs, + ?assertEqual(<<"chunk">>, hb_maps:get(<<"path">>, Msg)), + ?assertEqual(<<"377813969707255">>, hb_maps:get(<<"offset">>, Msg)), + ?assertEqual(<<"262144">>, hb_maps:get(<<"length">>, Msg)), + ?assertEqual(not_found, hb_maps:get(<<"offset">>, Base, not_found)), + ?assertEqual(not_found, hb_maps:get(<<"length">>, Base, not_found)). + inlined_quoted_key_test() -> Req = #{ <<"method">> => <<"POST">>, diff --git a/src/hb_store_arweave.erl b/src/hb_store_arweave.erl new file mode 100644 index 000000000..9720d121a --- /dev/null +++ b/src/hb_store_arweave.erl @@ -0,0 +1,152 @@ +%%% @doc A store implementation that relays to an Arweave node, using an +%%% intermediate cache of offsets as an ID->ArweaveLocation mapping. +-module(hb_store_arweave). +%%% Store API: +-export([type/2, read/2]). +%%% Indexing API: +-export([write_offset/5]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(ARWEAVE_INDEX_PATH, <<"~arweave@2.9-pre/offset">>). + +%% @doc Get the type of the data at the given key. We potentially cache the +%% result, so that we don't have to read the data from the GraphQL route +%% multiple times. +type(StoreOpts, Key) -> + case read(StoreOpts, Key) of + {error, not_found} -> not_found; + {ok, _Data} -> + % TODO: + % - should this return composite for any index L1 bundles? + % - if so, I guess we need to implement list/2? + % - for now we don't index nested bundle children, but once we + % do we may nalso need to return composite for them. + simple + end. + +read(StoreOpts = #{ <<"index-store">> := IndexStore }, ID) -> + case hb_store:read(IndexStore, path(ID)) of + {ok, Binary} -> + [IsTX, StartOffset, Length] = binary:split(Binary, <<":">>, [global]), + case hb_util:bool(IsTX) of + true -> + load_bundle(ID, + hb_util:int(StartOffset), hb_util:int(Length), StoreOpts); + false -> + load_item( + hb_util:int(StartOffset), hb_util:int(Length), StoreOpts) + end; + not_found -> + {error, not_found} + end. + +load_item(StartOffset, Length, Opts) -> + case read_chunks(StartOffset, Length, Opts) of + {ok, SerializedItem} -> + { + ok, + hb_message:convert( + ar_bundles:deserialize(SerializedItem), + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + ) + }; + {error, Reason} -> + {error, Reason} + end. + +load_bundle(ID, StartOffset, Length, Opts) -> + {ok, StructuredTXHeader} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ <<"path">> => <<"tx">>, <<"tx">> => ID, <<"exclude-data">> => true }, + Opts + ), + TXHeader = hb_message:convert( + StructuredTXHeader, + <<"tx@1.0">>, + <<"structured@1.0">>, + Opts), + case read_chunks(StartOffset, Length, Opts) of + {ok, SerializedItem} -> + { + ok, + hb_message:convert( + TXHeader#tx{ data = SerializedItem }, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + ) + }; + {error, Reason} -> + {error, Reason} + end. + +read_chunks(StartOffset, Length, Opts) -> + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => StartOffset + 1, + <<"length">> => Length + }, + Opts + ). + +write_offset( + #{ <<"index-store">> := IndexStore }, ID, IsTX, StartOffset, Length) -> + IsTxInt = hb_util:bool_int(IsTX), + Value = << + (hb_util:bin(IsTxInt))/binary, + ":", + (hb_util:bin(StartOffset))/binary, + ":", + (hb_util:bin(Length))/binary + >>, + hb_store:write(IndexStore, path(ID), Value). + +path(ID) -> + << + ?ARWEAVE_INDEX_PATH/binary, + "/", + (hb_util:bin(ID))/binary + >>. + + +%%% Tests + +write_read_tx_test() -> + Store = [hb_test_utils:test_store()], + Opts = #{ + <<"index-store">> => Store + }, + ID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + EndOffset = 363524457284025, + Size = 8387, + StartOffset = EndOffset - Size, + ok = write_offset(Opts, ID, true, StartOffset, Size), + {ok, Bundle} = read(Opts, ID), + ?assert(hb_message:verify(Bundle, all, #{})), + {ok, Child} = + hb_ao:resolve( + Bundle, + <<"1/2">>, + #{} + ), + ?assert(hb_message:verify(Child, all, #{})), + ExpectedChild = #{ + <<"data">> => <<"{\"totalTickedRewardsDistributed\":0,\"distributedEpochIndexes\":[],\"newDemandFactors\":[],\"newEpochIndexes\":[],\"tickedRewardDistributions\":[],\"newPruneGatewaysResults\":[{\"delegateStakeReturned\":0,\"stakeSlashed\":0,\"gatewayStakeReturned\":0,\"delegateStakeWithdrawing\":0,\"prunedGateways\":[],\"slashedGateways\":[],\"gatewayStakeWithdrawing\":0}]}">>, + <<"data-protocol">> => <<"ao">>, + <<"from-module">> => <<"cbn0KKrBZH7hdNkNokuXLtGryrWM--PjSTBqIzw9Kkk">>, + <<"from-process">> => <<"agYcCFJtrMG6cqMuZfskIkFTGvUPddICmtQSBIoPdiA">>, + <<"anchor">> => <<"MDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAyODAxODg">>, + <<"reference">> => <<"280188">>, + <<"target">> => <<"1R5QEtX53Z_RRQJwzFWf40oXiPW2FibErT_h02pu8MU">>, + <<"type">> => <<"Message">>, + <<"variant">> => <<"ao.TN.1">> + }, + ?assert(hb_message:match(ExpectedChild, Child, only_present)), + ok. + +%% XXX TODO: write/read for data item \ No newline at end of file diff --git a/src/hb_store_lmdb.erl b/src/hb_store_lmdb.erl index 0a970513c..0e72bff66 100644 --- a/src/hb_store_lmdb.erl +++ b/src/hb_store_lmdb.erl @@ -30,7 +30,7 @@ -include("include/hb.hrl"). %% Configuration constants with reasonable defaults --define(DEFAULT_SIZE, 16 * 1024 * 1024 * 1024). % 16GB default database size +-define(DEFAULT_SIZE, 2 * 1024 * 1024 * 1024 * 1024). % 16GB default database size -define(CONNECT_TIMEOUT, 6000). % Timeout for server communication -define(DEFAULT_IDLE_FLUSH_TIME, 5). % Idle server time before auto-flush -define(DEFAULT_MAX_FLUSH_TIME, 50). % Maximum time between flushes diff --git a/src/hb_util.erl b/src/hb_util.erl index 5a3502014..a6669715a 100644 --- a/src/hb_util.erl +++ b/src/hb_util.erl @@ -1,6 +1,6 @@ %% @doc A collection of utility functions for building with HyperBEAM. -module(hb_util). --export([int/1, float/1, atom/1, bin/1, list/1, map/1]). +-export([int/1, float/1, atom/1, bin/1, list/1, map/1, bool/1, bool_int/1]). -export([safe_int/1]). -export([ceil_int/2, floor_int/2]). -export([id/1, id/2, native_id/1, human_id/1, human_int/1, to_hex/1]). @@ -81,6 +81,24 @@ bin(Value) when is_list(Value) -> bin(Value) when is_binary(Value) -> Value. +%% @doc Coerce a value to a boolean. +bool(Value) -> + case Value of + true -> true; + false -> false; + <<"true">> -> true; + <<"false">> -> false; + <<"1">> -> true; + <<"0">> -> false; + 1 -> true; + 0 -> false; + _ -> false + end. + +%% @doc Coerce a boolean to 1 or 0. +bool_int(true) -> 1; +bool_int(false) -> 0. + %% @doc Coerce a value to a string list. list(Value) when is_binary(Value) -> binary_to_list(Value); diff --git a/src/include/ar.hrl b/src/include/ar.hrl index 751acef95..51c90feb3 100644 --- a/src/include/ar.hrl +++ b/src/include/ar.hrl @@ -14,6 +14,7 @@ -define(DEFAULT_ANCHOR, <<>>). -define(DEFAULT_TARGET, <<>>). -define(DEFAULT_DATA_ROOT, <<>>). +-define(DEFAULT_DATA_SIZE, 0). -define(DEFAULT_QUANTITY, 0). -define(DEFAULT_REWARD, 0). @@ -54,7 +55,7 @@ data = ?DEFAULT_DATA, manifest = undefined, %% Size in bytes of the transaction data. - data_size = 0, + data_size = ?DEFAULT_DATA_SIZE, %% Deprecated. Not used, not gossiped. data_tree = [], %% The Merkle root of the Merkle tree of data chunks. @@ -112,3 +113,8 @@ -define(BUNDLE_KEYS, [ <<"bundle-format">>, <<"bundle-version">>, <<"bundle-map">>]). + +%% The threshold was determined on the mainnet at the 2.5 fork block. The chunks +%% submitted after the threshold must adhere to stricter validation rules. +%% This offset is about half way through partition 8 +-define(STRICT_DATA_SPLIT_THRESHOLD, 30_607_159_107_830).