Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0968914
wip: Arweave TXID->offset indexing in `copycat@1.0`
samcamwilliams Jan 15, 2026
69fe4cf
fix: only treat path segments as ids if they can't further segmented
JamesPiechota Jan 16, 2026
f6532ed
wip: implement ~arweave@2.9-pre/chunk
JamesPiechota Jan 16, 2026
d767535
test: make dev_arweave tests more reliable
JamesPiechota Jan 17, 2026
bb45022
wip: support L1 TX messages that have data_size/data_root but no data…
JamesPiechota Jan 19, 2026
7d6137b
wip: write and read TX-bundle to hb_store_arweave
JamesPiechota Jan 19, 2026
3438a06
wip: working on getting a single data item from a bundle to load
JamesPiechota Jan 21, 2026
512fd16
fix: chunk up L1 tx data according to the arwave-js logic
JamesPiechota Jan 23, 2026
6ce7623
fix: use neo-arweave.zephyrdev.xyz as the gateway for GET /chunk requ…
JamesPiechota Jan 23, 2026
c629125
fix: use legacy chunking mode for format=1 transactions
JamesPiechota Jan 23, 2026
bfce13b
fix: add full block index test to dev_copycat_arweave
JamesPiechota Jan 25, 2026
6fb2ce0
test: update dev_copycat_arweave test to use the regular device entry…
JamesPiechota Jan 26, 2026
73b4fad
fix: enable hb_ao:resolve to work for indexed arweave items
JamesPiechota Jan 26, 2026
f153d81
fix: hb_store_arweave now uses start offset instead of end offset
JamesPiechota Jan 26, 2026
e5c114d
fix: update how ~copycat@1.0 iterates over the range of blocks to be …
JamesPiechota Jan 26, 2026
a2ed35d
fix: support bundles with large headers
JamesPiechota Jan 26, 2026
13a2eb3
fix: skip L1 TXs that are signed with ECDSA
JamesPiechota Jan 27, 2026
0c71ec6
impr: log count of items indexed in each block
JamesPiechota Jan 27, 2026
93ccc5b
chore: cleanup
JamesPiechota Jan 27, 2026
7ba7275
fix: allow RSA signatures of varying bitsize
JamesPiechota Jan 27, 2026
cb101c8
fix: skip transactions with non-string tags
JamesPiechota Jan 27, 2026
59d9906
impr: allow hb_http_client to retry on HTTP response
JamesPiechota Jan 28, 2026
dbb7460
fix: track and log skipped transactions while indexing
JamesPiechota Jan 28, 2026
1742301
fix: don't set the arweave index options in the default hb_opts
JamesPiechota Jan 29, 2026
62f1071
fix: allow hb_http:request to handle an {error, _} response
JamesPiechota Jan 29, 2026
e94951b
wip: add some performance metrics (may remove later)
JamesPiechota Jan 29, 2026
28e4f24
wip: move dev_copycat_arweave performance tracking over to hb_event
JamesPiechota Jan 30, 2026
1caeaeb
wip: add metric for full block processing
JamesPiechota Jan 30, 2026
a14c38d
impr: parallelize arweave indexing
JamesPiechota Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions src/ar_block.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
%%% @doc Copied and adapted from the arweave codebase.
%%% Should track: https://github.com/ArweaveTeam/arweave/blob/master/apps/arweave/src/ar_block.erl
-module(ar_block).

-export([strict_data_split_threshold/0, get_chunk_padded_offset/1, generate_size_tagged_list_from_txs/2]).

-include("include/ar.hrl").

%%%===================================================================
%%% Public interface.
%%%===================================================================

strict_data_split_threshold() -> ?STRICT_DATA_SPLIT_THRESHOLD.

%% @doc Return Offset if it is smaller than or equal to ar_block:strict_data_split_threshold().
%% Otherwise, return the offset of the last byte of the chunk + the size of the padding.
-spec get_chunk_padded_offset(Offset :: non_neg_integer()) -> non_neg_integer().
get_chunk_padded_offset(Offset) ->
case Offset > ar_block:strict_data_split_threshold() of
true ->
ar_poa:get_padded_offset(Offset, ar_block:strict_data_split_threshold());
false ->
Offset
end.

generate_size_tagged_list_from_txs(TXs, Height) ->
lists:reverse(
element(2,
lists:foldl(
fun(TX, {Pos, List}) ->
DataSize = TX#tx.data_size,
End = Pos + DataSize,
case Height >= ar_fork:height_2_5() of
true ->
Padding = ar_tx:get_weave_size_increase(DataSize, Height)
- DataSize,
%% Encode the padding information in the Merkle tree.
case Padding > 0 of
true ->
PaddingRoot = <<>>,
{End + Padding, [{{padding, PaddingRoot}, End + Padding},
{{TX, get_tx_data_root(TX)}, End} | List]};
false ->
{End, [{{TX, get_tx_data_root(TX)}, End} | List]}
end;
false ->
{End, [{{TX, get_tx_data_root(TX)}, End} | List]}
end
end,
{0, []},
lists:sort(TXs)
)
)
).

get_tx_data_root(#tx{ format = 2, data_root = DataRoot }) ->
DataRoot;
get_tx_data_root(TX) ->
(ar_tx:generate_chunk_tree(TX))#tx.data_root.
54 changes: 50 additions & 4 deletions src/ar_bundles.erl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
-export([encode_tags/1, decode_tags/1]).
-export([serialize/1, deserialize/1, serialize_bundle/3]).
-export([data_item_signature_data/1]).
-export([bundle_header_size/1, decode_bundle_header/1]).
-include("include/hb.hrl").
-include_lib("eunit/include/eunit.hrl").

Expand Down Expand Up @@ -430,14 +431,14 @@ maybe_unbundle(Item) ->

unbundle_list(Item) ->
case unbundle(Item#tx.data) of
detached -> Item#tx{data = detached};
?DEFAULT_DATA -> Item#tx{data = ?DEFAULT_DATA};
Items -> Item#tx{data = hb_util:list_to_numbered_message(Items)}
end.

unbundle_map(Item) ->
MapTXID = dev_arweave_common:tagfind(<<"bundle-map">>, Item#tx.tags, <<>>),
case unbundle(Item#tx.data) of
detached -> Item#tx{data = detached};
?DEFAULT_DATA -> Item#tx{data = ?DEFAULT_DATA};
Items ->
MapItem = find_single_layer(hb_util:decode(MapTXID), Items),
Map = hb_json:decode(MapItem#tx.data),
Expand Down Expand Up @@ -469,7 +470,7 @@ find_single_layer(UnsignedID, Items) ->
unbundle(<<Count:256/little-integer, Content/binary>>) ->
{ItemsBin, Items} = decode_bundle_header(Count, Content),
decode_bundle_items(Items, ItemsBin);
unbundle(<<>>) -> detached.
unbundle(?DEFAULT_DATA) -> ?DEFAULT_DATA.

decode_bundle_items([], <<>>) ->
[];
Expand All @@ -487,10 +488,23 @@ decode_bundle_items([{_ID, Size} | RestItems], ItemsBin) ->
)
].

bundle_header_size(<<Count:256/little-integer, _/binary>>) ->
% Eeach item in the bundle header index consumes 64 bytes
32 + (Count * 64).

decode_bundle_header(<<Count:256/little-integer, Content/binary>>) ->
decode_bundle_header(Count, Content);
decode_bundle_header(<<>>) ->
{<<>>, []}.

decode_bundle_header(Count, Bin) -> decode_bundle_header(Count, Bin, []).
decode_bundle_header(0, ItemsBin, Header) ->
{ItemsBin, lists:reverse(Header)};
decode_bundle_header(Count, <<Size:256/little-integer, ID:32/binary, Rest/binary>>, Header) ->
decode_bundle_header(
Count,
<<Size:256/little-integer, ID:32/binary, Rest/binary>>,
Header
) ->
decode_bundle_header(Count - 1, Rest, [{ID, Size} | Header]).

%% @doc Decode the signature from a binary format. Only RSA 4096 is currently supported.
Expand Down Expand Up @@ -643,6 +657,38 @@ with_zero_length_tag_test() ->
Deserialized = deserialize(Serialized),
?assertEqual(Item, Deserialized).

decode_bundle_header_test() ->
?assertEqual({<<>>, []}, decode_bundle_header(<<>>)),
Tail = <<"tail">>,
?assertEqual(
{Tail, []},
decode_bundle_header(<<0:256/little, Tail/binary>>)
),
ID1 = crypto:strong_rand_bytes(32),
Items1 = <<"abcde">>,
?assertEqual(
{Items1, [{ID1, 5}]},
decode_bundle_header(<<1:256/little, 5:256/little, ID1:32/binary, Items1/binary>>)
),
ID2 = crypto:strong_rand_bytes(32),
ID3 = crypto:strong_rand_bytes(32),
Items2 = <<"payload">>,
?assertEqual(
{Items2, [{ID2, 4}, {ID3, 2}]},
decode_bundle_header(
<<
2:256/little,
4:256/little, ID2:32/binary,
2:256/little, ID3:32/binary,
Items2/binary
>>
)
),
?assertEqual(
{<<>>, [{ID1, 6}]},
decode_bundle_header(<<1:256/little, 6:256/little, ID1:32/binary>>)
).

unsigned_data_item_id_test() ->
Item1 = deserialize(
serialize(
Expand Down
173 changes: 173 additions & 0 deletions src/ar_fork.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
%%%
%%% @doc The module defines Arweave hard forks' heights.
%%%

-module(ar_fork).

-export([height_1_6/0, height_1_7/0, height_1_8/0, height_1_9/0, height_2_0/0, height_2_2/0,
height_2_3/0, height_2_4/0, height_2_5/0, height_2_6/0, height_2_6_8/0,
height_2_7/0, height_2_7_1/0, height_2_7_2/0,
height_2_8/0, height_2_9/0]).

-ifdef(FORKS_RESET).
height_1_6() ->
0.
-else.
height_1_6() ->
95000.
-endif.

-ifdef(FORKS_RESET).
height_1_7() ->
0.
-else.
height_1_7() ->
235200. % Targeting 2019-07-08 UTC
-endif.

-ifdef(FORKS_RESET).
height_1_8() ->
0.
-else.
height_1_8() ->
269510. % Targeting 2019-08-29 UTC
-endif.

-ifdef(FORKS_RESET).
height_1_9() ->
0.
-else.
height_1_9() ->
315700. % Targeting 2019-11-04 UTC
-endif.

-ifdef(FORKS_RESET).
height_2_0() ->
0.
-else.
height_2_0() ->
422250. % Targeting 2020-04-09 10:00 UTC
-endif.

-ifdef(FORKS_RESET).
height_2_2() ->
0.
-else.
height_2_2() ->
552180. % Targeting 2020-10-21 13:00 UTC
-endif.

-ifdef(FORKS_RESET).
height_2_3() ->
0.
-else.
height_2_3() ->
591140. % Targeting 2020-12-21 11:00 UTC
-endif.

-ifdef(FORKS_RESET).
height_2_4() ->
0.
-else.
height_2_4() ->
633720. % Targeting 2021-02-24 11:50 UTC
-endif.

-ifdef(FORKS_RESET).
height_2_5() ->
0.
-else.
height_2_5() ->
812970.
-endif.

-ifdef(FORK_2_6_HEIGHT).
height_2_6() ->
?FORK_2_6_HEIGHT.
-else.
-ifdef(FORKS_RESET).
height_2_6() ->
0.
-else.
height_2_6() ->
1132210. % Targeting 2023-03-06 14:00 UTC
-endif.
-endif.

-ifdef(FORK_2_6_8_HEIGHT).
height_2_6_8() ->
?FORK_2_6_8_HEIGHT.
-else.
-ifdef(FORKS_RESET).
height_2_6_8() ->
0.
-else.
height_2_6_8() ->
1189560. % Targeting 2023-05-30 16:00 UTC
-endif.
-endif.

-ifdef(FORK_2_7_HEIGHT).
height_2_7() ->
?FORK_2_7_HEIGHT.
-else.
-ifdef(FORKS_RESET).
height_2_7() ->
0.
-else.
height_2_7() ->
1275480. % Targeting 2023-10-04 14:00 UTC
-endif.
-endif.

-ifdef(FORK_2_7_1_HEIGHT).
height_2_7_1() ->
?FORK_2_7_1_HEIGHT.
-else.
-ifdef(FORKS_RESET).
height_2_7_1() ->
0.
-else.
height_2_7_1() ->
1316410. % Targeting 2023-12-05 14:00 UTC
-endif.
-endif.

-ifdef(FORK_2_7_2_HEIGHT).
height_2_7_2() ->
?FORK_2_7_2_HEIGHT.
-else.
-ifdef(FORKS_RESET).
height_2_7_2() ->
0.
-else.
height_2_7_2() ->
1391330. % Targeting 2024-03-26 14:00 UTC
-endif.
-endif.

-ifdef(FORK_2_8_HEIGHT).
height_2_8() ->
?FORK_2_8_HEIGHT.
-else.
-ifdef(FORKS_RESET).
height_2_8() ->
0.
-else.
height_2_8() ->
1547120. % Targeting 2024-11-13 14:00 UTC
-endif.
-endif.

-ifdef(FORK_2_9_HEIGHT).
height_2_9() ->
?FORK_2_9_HEIGHT.
-else.
-ifdef(FORKS_RESET).
height_2_9() ->
0.
-else.
height_2_9() ->
1602350. % Targeting 2025-02-03 14:00 UTC
-endif.
-endif.
8 changes: 6 additions & 2 deletions src/ar_format.erl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ format(TX, Indent, Opts) when is_list(TX); is_map(TX) ->
format(TX, Indent, Opts) when is_record(TX, tx) ->
MustVerify = hb_opts:get(debug_ids, true, Opts),
Valid =
if MustVerify -> verify(TX);
if MustVerify -> verify(dev_arweave_common:normalize(TX));
true -> true
end,
UnsignedID =
Expand Down Expand Up @@ -65,7 +65,7 @@ format(TX, Indent, Opts) when is_record(TX, tx) ->
[hb_util:safe_encode(ar_bundles:signer(TX))],
Indent + 1),
format_line("Signature: ~s",
[hb_format:binary(TX#tx.signature)],
[hb_format:binary(TX#tx.signature, Opts)],
Indent + 1);
false -> []
end ++
Expand Down Expand Up @@ -125,6 +125,7 @@ format_fields(TX, Indent) ->
format_anchor(TX, Indent) ++
format_quantity(TX, Indent) ++
format_reward(TX, Indent) ++
format_data_size(TX, Indent) ++
format_data_root(TX, Indent).

format_format(TX, Indent) ->
Expand Down Expand Up @@ -152,6 +153,9 @@ format_quantity(TX, Indent) ->
format_reward(TX, Indent) ->
format_line("Reward: ~p", [TX#tx.reward], Indent + 1).

format_data_size(TX, Indent) ->
format_line("Data Size: ~p", [TX#tx.data_size], Indent + 1).

format_data_root(TX, Indent) ->
format_line("Data Root: ~s", [
case TX#tx.data_root of
Expand Down
18 changes: 18 additions & 0 deletions src/ar_poa.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
%%% @doc This module implements all mechanisms required to validate a proof of access
%%% for a chunk of data received from the network.
-module(ar_poa).

-export([get_padded_offset/1, get_padded_offset/2]).

-include("include/ar.hrl").

%% @doc Return the smallest multiple of 256 KiB >= Offset
%% counting from ar_block:strict_data_split_threshold().
get_padded_offset(Offset) ->
get_padded_offset(Offset, ar_block:strict_data_split_threshold()).

%% @doc Return the smallest multiple of 256 KiB >= Offset
%% counting from StrictDataSplitThreshold.
get_padded_offset(Offset, StrictDataSplitThreshold) ->
Diff = Offset - StrictDataSplitThreshold,
StrictDataSplitThreshold + ((Diff - 1) div (?DATA_CHUNK_SIZE) + 1) * (?DATA_CHUNK_SIZE).
Loading