diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cf8d1189..cf97f6305 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,12 +16,15 @@ The following emojis are used to highlight certain changes: ### Added +- `dag/walker`: new package for memory-efficient DAG traversal with deduplication. `VisitedTracker` interface with `BloomTracker` (scalable bloom filter chain, ~4 bytes/CID vs ~75 bytes for a map) and `MapTracker` (exact, for tests). `WalkDAG` provides iterative DFS traversal with integrated dedup, supporting dag-pb, dag-cbor, raw, and other registered codecs. ~2x faster than the legacy go-ipld-prime selector-based traversal. `WalkEntityRoots` emits only entity roots (files, directories, HAMT shards) instead of every block, skipping internal file chunks. [#1124](https://github.com/ipfs/boxo/pull/1124) +- `pinner`: `NewUniquePinnedProvider` and `NewPinnedEntityRootsProvider` log and skip corrupted pin entries instead of aborting the provide cycle, allowing remaining pins to still be provided. [#1124](https://github.com/ipfs/boxo/pull/1124) - `routing/http/client`: `WithProviderInfoFunc` option resolves provider addresses at provide-time instead of client construction time. This only impacts legacy HTTP-only custom routing setups that depend on [IPIP-526](https://github.com/ipfs/specs/pull/526) and were sending unresolved `0.0.0.0` addresses in provider records instead of actual interface addresses. [#1115](https://github.com/ipfs/boxo/pull/1115) - `chunker`: added `Register` function to allow custom chunkers to be registered for use with `FromString`. - `mfs`: added `Directory.Mode()` and `Directory.ModTime()` getters to match the existing `File.Mode()` and `File.ModTime()` API. [#1131](https://github.com/ipfs/boxo/pull/1131) ### Changed +- `provider`: `NewPrioritizedProvider` now continues to the next stream when one fails instead of stopping all streams. `NewConcatProvider` added for pre-deduplicated streams. [#1124](https://github.com/ipfs/boxo/pull/1124) - `chunker`: `FromString` now rejects malformed `size-` strings with extra parameters (e.g. `size-123-extra` was previously silently accepted). - `gateway`: compliance with gateway-conformance [v0.13](https://github.com/ipfs/gateway-conformance/releases/tag/v0.13) - upgrade to `go-libp2p` [v0.48.0](https://github.com/libp2p/go-libp2p/releases/tag/v0.48.0) diff --git a/dag/walker/doc.go b/dag/walker/doc.go new file mode 100644 index 000000000..975092eaf --- /dev/null +++ b/dag/walker/doc.go @@ -0,0 +1,21 @@ +// Package walker provides memory-efficient DAG traversal with +// deduplication. Optimized for the IPFS provide system, but useful +// anywhere repeated DAG walks need to skip already-visited subtrees. +// +// The primary entry point is [WalkDAG], which walks a DAG rooted at a +// given CID, emitting each visited CID to a callback. When combined +// with a [VisitedTracker] (e.g. [BloomTracker]), entire subtrees +// already seen are skipped in O(1). +// +// For entity-aware traversal that only emits file/directory/HAMT roots +// instead of every block, see [WalkEntityRoots]. +// +// Blocks are decoded using the codecs registered in the process via +// the global multicodec registry. In a standard kubo build this +// includes dag-pb, dag-cbor, dag-json, cbor, json, and raw. +// +// Use [LinksFetcherFromBlockstore] to create a fetcher backed by a +// local blockstore. For custom link extraction (e.g. a different codec +// registry or non-blockstore storage), pass your own [LinksFetcher] +// function directly to [WalkDAG]. +package walker diff --git a/dag/walker/entity.go b/dag/walker/entity.go new file mode 100644 index 000000000..cf8abbddb --- /dev/null +++ b/dag/walker/entity.go @@ -0,0 +1,145 @@ +package walker + +import ( + "context" + + blockstore "github.com/ipfs/boxo/blockstore" + "github.com/ipfs/boxo/ipld/unixfs" + cid "github.com/ipfs/go-cid" + ipld "github.com/ipld/go-ipld-prime" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" +) + +// EntityType represents the semantic type of a DAG entity. +type EntityType int + +const ( + EntityUnknown EntityType = iota + EntityFile // UnixFS file root (not its chunks) + EntityDirectory // UnixFS flat directory + EntityHAMTShard // UnixFS HAMT sharded directory bucket + EntitySymlink // UnixFS symbolic link +) + +// NodeFetcher returns child link CIDs and entity type for a given CID. +// Used by [WalkEntityRoots] which needs UnixFS type detection to decide +// whether to descend into children (directories, HAMT shards) or stop +// (files, symlinks). +type NodeFetcher func(ctx context.Context, c cid.Cid) (linkCIDs []cid.Cid, entityType EntityType, err error) + +// NodeFetcherFromBlockstore creates a [NodeFetcher] backed by a local +// blockstore. Like [LinksFetcherFromBlockstore], it decodes blocks via +// ipld-prime's global multicodec registry (dag-pb, dag-cbor, raw, etc.) +// and handles identity CIDs transparently via [blockstore.NewIdStore]. +// +// Entity type detection: +// - dag-pb with valid UnixFS Data: file, directory, HAMT shard, or symlink +// - dag-pb without valid UnixFS Data: EntityUnknown +// - raw codec: EntityFile (small file stored as a single raw block) +// - all other codecs (dag-cbor, dag-json, etc.): EntityUnknown +func NodeFetcherFromBlockstore(bs blockstore.Blockstore) NodeFetcher { + ls := linkSystemForBlockstore(bs) + + return func(ctx context.Context, c cid.Cid) ([]cid.Cid, EntityType, error) { + lnk := cidlink.Link{Cid: c} + nd, err := ls.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any) + if err != nil { + return nil, EntityUnknown, err + } + + links := collectLinks(c, nd) + entityType := detectEntityType(c, nd) + return links, entityType, nil + } +} + +// detectEntityType infers the UnixFS entity type from an ipld-prime +// decoded node. For dag-pb nodes, it reads the "Data" field and parses +// it as UnixFS protobuf. For raw codec nodes, it returns EntityFile. +// For everything else, it returns EntityUnknown. +func detectEntityType(c cid.Cid, nd ipld.Node) EntityType { + codec := c.Prefix().Codec + + // raw codec: small file stored as a single block + if codec == cid.Raw { + return EntityFile + } + + // only dag-pb has UnixFS semantics; other codecs are unknown + if codec != cid.DagProtobuf { + return EntityUnknown + } + + // dag-pb: try to read the "Data" field for UnixFS type + dataField, err := nd.LookupByString("Data") + if err != nil || dataField.IsAbsent() || dataField.IsNull() { + return EntityUnknown + } + + dataBytes, err := dataField.AsBytes() + if err != nil { + return EntityUnknown + } + + fsn, err := unixfs.FSNodeFromBytes(dataBytes) + if err != nil { + return EntityUnknown + } + + switch fsn.Type() { + case unixfs.TFile, unixfs.TRaw: + return EntityFile + case unixfs.TDirectory: + return EntityDirectory + case unixfs.THAMTShard: + return EntityHAMTShard + case unixfs.TSymlink: + return EntitySymlink + default: + return EntityUnknown + } +} + +// WalkEntityRoots traverses a DAG calling emit for each entity root. +// +// Entity roots are semantic boundaries in the DAG: +// - File/symlink roots: emitted, children (chunks) NOT traversed +// - Directory roots: emitted, children recursed +// - HAMT shard nodes: emitted (needed for directory enumeration), +// children recursed +// - Non-UnixFS nodes (dag-cbor, dag-json, etc.): emitted AND children +// recursed to discover further content. The +entities optimization +// (skip chunks) only applies to UnixFS files; for all other codecs, +// every reachable CID is emitted. +// - Raw leaf nodes: emitted (no children to recurse) +// +// Same traversal order as [WalkDAG]: pre-order DFS with left-to-right +// sibling visiting. Uses the same option types: [WithVisitedTracker] +// for bloom/map dedup across walks, [WithLocality] for MFS locality +// checks. +func WalkEntityRoots( + ctx context.Context, + root cid.Cid, + fetch NodeFetcher, + emit func(cid.Cid) bool, + opts ...Option, +) error { + cfg := &walkConfig{} + for _, o := range opts { + o(cfg) + } + return walkLoop(ctx, root, func(ctx context.Context, c cid.Cid) ([]cid.Cid, error) { + children, entityType, err := fetch(ctx, c) + if err != nil { + return nil, err + } + // Only descend into directories, HAMT shards, and unknown + // node types. File and symlink children (chunks) are not + // entity roots, so we stop here. + if entityType == EntityFile || entityType == EntitySymlink { + return nil, nil + } + return children, nil + }, emit, cfg) +} diff --git a/dag/walker/entity_test.go b/dag/walker/entity_test.go new file mode 100644 index 000000000..058a6b77b --- /dev/null +++ b/dag/walker/entity_test.go @@ -0,0 +1,519 @@ +package walker_test + +import ( + "bytes" + "context" + "fmt" + "io" + "testing" + + "github.com/ipfs/boxo/blockservice" + blockstore "github.com/ipfs/boxo/blockstore" + "github.com/ipfs/boxo/dag/walker" + "github.com/ipfs/boxo/exchange/offline" + "github.com/ipfs/boxo/ipld/merkledag" + ft "github.com/ipfs/boxo/ipld/unixfs" + "github.com/ipfs/boxo/ipld/unixfs/hamt" + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" + format "github.com/ipfs/go-ipld-format" + ipld "github.com/ipld/go-ipld-prime" + _ "github.com/ipld/go-ipld-prime/codec/dagcbor" + "github.com/ipld/go-ipld-prime/fluent/qp" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestDAGService(bs blockstore.Blockstore) format.DAGService { + bserv := blockservice.New(bs, offline.Exchange(bs)) + return merkledag.NewDAGService(bserv) +} + +func fileNodeWithData(t *testing.T, data []byte) *merkledag.ProtoNode { + t.Helper() + fsn := ft.NewFSNode(ft.TFile) + fsn.SetData(data) + nodeData, err := fsn.GetBytes() + require.NoError(t, err) + return merkledag.NodeWithData(nodeData) +} + +// putDagCBOR builds a dag-cbor block {name: string, links: [CID...]} +// and stores it in the blockstore. Returns its CID. +func putDagCBOR(t *testing.T, bs blockstore.Blockstore, name string, linkCIDs ...cid.Cid) cid.Cid { + t.Helper() + ls := cidlink.DefaultLinkSystem() + ls.StorageWriteOpener = func(_ ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { + buf := bytes.Buffer{} + return &buf, func(lnk ipld.Link) error { + cl := lnk.(cidlink.Link) + blk, err := blocks.NewBlockWithCid(buf.Bytes(), cl.Cid) + if err != nil { + return err + } + return bs.Put(context.Background(), blk) + }, nil + } + lp := cidlink.LinkPrototype{Prefix: cid.Prefix{ + Version: 1, Codec: cid.DagCBOR, MhType: 0x12, MhLength: 32, + }} + nd, err := qp.BuildMap(basicnode.Prototype.Any, -1, func(ma ipld.MapAssembler) { + qp.MapEntry(ma, "name", qp.String(name)) + qp.MapEntry(ma, "links", qp.List(-1, func(la ipld.ListAssembler) { + for _, c := range linkCIDs { + qp.ListEntry(la, qp.Link(cidlink.Link{Cid: c})) + } + })) + }) + require.NoError(t, err) + lnk, err := ls.Store(ipld.LinkContext{}, lp, nd) + require.NoError(t, err) + return lnk.(cidlink.Link).Cid +} + +func collectEntityWalk(t *testing.T, bs blockstore.Blockstore, root cid.Cid, opts ...walker.Option) []cid.Cid { + t.Helper() + var visited []cid.Cid + fetch := walker.NodeFetcherFromBlockstore(bs) + err := walker.WalkEntityRoots(t.Context(), root, fetch, func(c cid.Cid) bool { + visited = append(visited, c) + return true + }, opts...) + require.NoError(t, err) + return visited +} + +// --- UnixFS entity type detection --- +// +// These tests verify that WalkEntityRoots correctly identifies UnixFS +// entity types and treats them appropriately: files stop traversal +// (chunks not descended), directories and HAMT shards recurse. + +func TestEntityWalk_UnixFSFile(t *testing.T) { + // A single UnixFS file is an entity root. WalkEntityRoots should + // emit it and stop -- no children to recurse into. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + file := fileNodeWithData(t, []byte("content")) + require.NoError(t, dserv.Add(t.Context(), file)) + + visited := collectEntityWalk(t, bs, file.Cid()) + assert.Len(t, visited, 1) + assert.Equal(t, file.Cid(), visited[0]) +} + +func TestEntityWalk_ChunkedFileDoesNotDescend(t *testing.T) { + // A chunked file has child links (chunks). This is the core + // +entities optimization: the file root is emitted but its chunks + // are NOT traversed. Without this, every chunk CID would be + // provided to the DHT, which is wasteful. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + chunk1 := merkledag.NewRawNode([]byte("chunk1")) + chunk2 := merkledag.NewRawNode([]byte("chunk2")) + require.NoError(t, dserv.Add(t.Context(), chunk1)) + require.NoError(t, dserv.Add(t.Context(), chunk2)) + + fsn := ft.NewFSNode(ft.TFile) + fsn.AddBlockSize(6) + fsn.AddBlockSize(6) + fileData, err := fsn.GetBytes() + require.NoError(t, err) + fileNode := merkledag.NodeWithData(fileData) + fileNode.AddNodeLink("", chunk1) + fileNode.AddNodeLink("", chunk2) + require.NoError(t, dserv.Add(t.Context(), fileNode)) + + visited := collectEntityWalk(t, bs, fileNode.Cid()) + assert.Len(t, visited, 1, "only file root, NOT chunks") + assert.Equal(t, fileNode.Cid(), visited[0]) + assert.NotContains(t, visited, chunk1.Cid()) + assert.NotContains(t, visited, chunk2.Cid()) +} + +func TestEntityWalk_RawNodeIsFile(t *testing.T) { + // Raw codec blocks (CIDv1 small files) are treated as files. + // They have no children and should be emitted as a single entity. + bs := newTestBlockstore() + raw := merkledag.NewRawNode([]byte("small file")) + require.NoError(t, bs.Put(t.Context(), raw)) + + visited := collectEntityWalk(t, bs, raw.Cid()) + assert.Len(t, visited, 1) + assert.Equal(t, raw.Cid(), visited[0]) +} + +func TestEntityWalk_Symlink(t *testing.T) { + // A UnixFS symlink is a leaf entity, like a file. It is emitted + // but its children (none, by definition) are not descended. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + fsn := ft.NewFSNode(ft.TSymlink) + fsn.SetData([]byte("/some/target/path")) + symData, err := fsn.GetBytes() + require.NoError(t, err) + symNode := merkledag.NodeWithData(symData) + require.NoError(t, dserv.Add(t.Context(), symNode)) + + visited := collectEntityWalk(t, bs, symNode.Cid()) + assert.Len(t, visited, 1) + assert.Equal(t, symNode.Cid(), visited[0]) +} + +func TestEntityWalk_DirectoryWithSymlink(t *testing.T) { + // A directory containing a symlink. Both the directory and the + // symlink should be emitted (symlink is a leaf entity). + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + fsn := ft.NewFSNode(ft.TSymlink) + fsn.SetData([]byte("../other")) + symData, err := fsn.GetBytes() + require.NoError(t, err) + symNode := merkledag.NodeWithData(symData) + require.NoError(t, dserv.Add(t.Context(), symNode)) + + file := fileNodeWithData(t, []byte("real-file")) + require.NoError(t, dserv.Add(t.Context(), file)) + + dir := ft.EmptyDirNode() + dir.AddNodeLink("link.txt", symNode) + dir.AddNodeLink("real.txt", file) + require.NoError(t, dserv.Add(t.Context(), dir)) + + visited := collectEntityWalk(t, bs, dir.Cid()) + assert.Len(t, visited, 3, "dir + symlink + file") + assert.Equal(t, dir.Cid(), visited[0], "directory emitted first") + assert.Contains(t, visited, symNode.Cid()) + assert.Contains(t, visited, file.Cid()) +} + +func TestEntityWalk_Directory(t *testing.T) { + // A UnixFS directory is a container entity. It is emitted and its + // children (files) are recursed into. Each file is also emitted. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + file1 := fileNodeWithData(t, []byte("file1")) + file2 := fileNodeWithData(t, []byte("file2")) + require.NoError(t, dserv.Add(t.Context(), file1)) + require.NoError(t, dserv.Add(t.Context(), file2)) + + dir := ft.EmptyDirNode() + dir.AddNodeLink("file1", file1) + dir.AddNodeLink("file2", file2) + require.NoError(t, dserv.Add(t.Context(), dir)) + + visited := collectEntityWalk(t, bs, dir.Cid()) + assert.Len(t, visited, 3, "dir + 2 files") + assert.Equal(t, dir.Cid(), visited[0], "directory emitted first") + assert.Contains(t, visited, file1.Cid()) + assert.Contains(t, visited, file2.Cid()) +} + +func TestEntityWalk_SiblingOrder(t *testing.T) { + // Siblings must be visited in left-to-right link order, matching + // the legacy BlockAll traversal and WalkDAG's order. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + fileA := fileNodeWithData(t, []byte("aaa")) + fileB := fileNodeWithData(t, []byte("bbb")) + fileC := fileNodeWithData(t, []byte("ccc")) + require.NoError(t, dserv.Add(t.Context(), fileA)) + require.NoError(t, dserv.Add(t.Context(), fileB)) + require.NoError(t, dserv.Add(t.Context(), fileC)) + + dir := ft.EmptyDirNode() + dir.AddNodeLink("a.txt", fileA) + dir.AddNodeLink("b.txt", fileB) + dir.AddNodeLink("c.txt", fileC) + require.NoError(t, dserv.Add(t.Context(), dir)) + + visited := collectEntityWalk(t, bs, dir.Cid()) + require.Len(t, visited, 4) + assert.Equal(t, dir.Cid(), visited[0], "dir first (pre-order)") + assert.Equal(t, fileA.Cid(), visited[1], "first link visited first") + assert.Equal(t, fileB.Cid(), visited[2], "second link visited second") + assert.Equal(t, fileC.Cid(), visited[3], "third link visited third") +} + +func TestEntityWalk_DirectoryWithRawFiles(t *testing.T) { + // Directory containing raw codec files. Both the directory and the + // raw files should be emitted (raw = file entity). + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + raw1 := merkledag.NewRawNode([]byte("raw 1")) + raw2 := merkledag.NewRawNode([]byte("raw 2")) + require.NoError(t, dserv.Add(t.Context(), raw1)) + require.NoError(t, dserv.Add(t.Context(), raw2)) + + dir := ft.EmptyDirNode() + dir.AddNodeLink("r1.bin", raw1) + dir.AddNodeLink("r2.bin", raw2) + require.NoError(t, dserv.Add(t.Context(), dir)) + + visited := collectEntityWalk(t, bs, dir.Cid()) + assert.Len(t, visited, 3, "dir + 2 raw files") + assert.Contains(t, visited, dir.Cid()) + assert.Contains(t, visited, raw1.Cid()) + assert.Contains(t, visited, raw2.Cid()) +} + +// --- HAMT sharded directories --- + +func TestEntityWalk_HAMTDirectory(t *testing.T) { + // HAMT sharded directories store entries across multiple internal + // shard nodes. WalkEntityRoots must emit ALL shard nodes (needed + // for peers to enumerate the directory) AND all file entries, but + // must NOT descend into file chunks. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + shard, err := hamt.NewShard(dserv, 256) + require.NoError(t, err) + + numFiles := 100 + fileCIDs := make(map[cid.Cid]struct{}) + for i := range numFiles { + file := fileNodeWithData(t, []byte(fmt.Sprintf("hamt-file-%d", i))) + require.NoError(t, dserv.Add(t.Context(), file)) + require.NoError(t, shard.Set(t.Context(), fmt.Sprintf("file%d.txt", i), file)) + fileCIDs[file.Cid()] = struct{}{} + } + rootNd, err := shard.Node() + require.NoError(t, err) + + tracker := walker.NewMapTracker() + visited := collectEntityWalk(t, bs, rootNd.Cid(), walker.WithVisitedTracker(tracker)) + + // all files emitted + for fc := range fileCIDs { + assert.Contains(t, visited, fc) + } + // root shard emitted + assert.Contains(t, visited, rootNd.Cid()) + // internal shard nodes exist beyond just files + root + shardCount := len(visited) - numFiles + assert.Greater(t, shardCount, 0, + "HAMT with %d entries must have internal shard nodes", numFiles) + t.Logf("HAMT: %d visited (%d files + %d shard nodes)", len(visited), numFiles, shardCount) +} + +// --- non-UnixFS codecs (dag-cbor) --- +// +// The +entities chunk-skip optimization only applies to UnixFS files. +// For all other codecs, every reachable CID is emitted AND its children +// are followed. This ensures that dag-cbor metadata wrapping UnixFS +// content is fully discoverable. + +func TestEntityWalk_DagCBORStandalone(t *testing.T) { + // A single dag-cbor block with no links. Should be emitted as an + // opaque entity root (non-UnixFS). + bs := newTestBlockstore() + c := putDagCBOR(t, bs, "standalone") + visited := collectEntityWalk(t, bs, c) + assert.Len(t, visited, 1) + assert.Equal(t, c, visited[0]) +} + +func TestEntityWalk_DagCBORChain(t *testing.T) { + // dag-cbor A -> B -> C. All three are non-UnixFS, so all must be + // emitted. The walk follows links in non-UnixFS nodes to discover + // further content. + bs := newTestBlockstore() + cC := putDagCBOR(t, bs, "C") + cB := putDagCBOR(t, bs, "B", cC) + cA := putDagCBOR(t, bs, "A", cB) + + visited := collectEntityWalk(t, bs, cA) + assert.Len(t, visited, 3, "all dag-cbor nodes emitted") + assert.Contains(t, visited, cA) + assert.Contains(t, visited, cB) + assert.Contains(t, visited, cC) +} + +func TestEntityWalk_DagCBORLinkingToUnixFS(t *testing.T) { + // dag-cbor root linking to a chunked UnixFS file. The dag-cbor + // root and the file root are emitted, but the file's chunks are + // NOT (entities optimization applies to the UnixFS file). + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + chunk := merkledag.NewRawNode([]byte("chunk")) + require.NoError(t, dserv.Add(t.Context(), chunk)) + fsn := ft.NewFSNode(ft.TFile) + fsn.AddBlockSize(5) + fileData, err := fsn.GetBytes() + require.NoError(t, err) + fileNode := merkledag.NodeWithData(fileData) + fileNode.AddNodeLink("", chunk) + require.NoError(t, dserv.Add(t.Context(), fileNode)) + + cborRoot := putDagCBOR(t, bs, "metadata", fileNode.Cid()) + + visited := collectEntityWalk(t, bs, cborRoot) + assert.Contains(t, visited, cborRoot, "dag-cbor root emitted") + assert.Contains(t, visited, fileNode.Cid(), "UnixFS file root emitted") + assert.NotContains(t, visited, chunk.Cid(), + "file chunks NOT emitted (entities optimization)") + assert.Len(t, visited, 2) +} + +// --- mixed codec DAG --- + +func TestEntityWalk_MixedCodecs(t *testing.T) { + // dag-cbor root -> UnixFS directory -> {file, raw leaf} + // All entity roots emitted, file chunks skipped. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + raw := merkledag.NewRawNode([]byte("raw leaf")) + require.NoError(t, dserv.Add(t.Context(), raw)) + + file := fileNodeWithData(t, []byte("file content")) + require.NoError(t, dserv.Add(t.Context(), file)) + + dir := ft.EmptyDirNode() + dir.AddNodeLink("raw.bin", raw) + dir.AddNodeLink("file.txt", file) + require.NoError(t, dserv.Add(t.Context(), dir)) + + cborRoot := putDagCBOR(t, bs, "wrapper", dir.Cid()) + + visited := collectEntityWalk(t, bs, cborRoot) + assert.Len(t, visited, 4, "cbor root + dir + file + raw") + assert.Contains(t, visited, cborRoot) + assert.Contains(t, visited, dir.Cid()) + assert.Contains(t, visited, file.Cid()) + assert.Contains(t, visited, raw.Cid()) +} + +// --- dedup across walks --- + +func TestEntityWalk_SharedTrackerDedup(t *testing.T) { + // Two directories sharing a file. With a shared VisitedTracker, + // the shared file is emitted only once across both walks. This is + // the cross-pin dedup mechanism for the reprovide cycle. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + shared := fileNodeWithData(t, []byte("shared")) + unique1 := fileNodeWithData(t, []byte("unique1")) + unique2 := fileNodeWithData(t, []byte("unique2")) + require.NoError(t, dserv.Add(t.Context(), shared)) + require.NoError(t, dserv.Add(t.Context(), unique1)) + require.NoError(t, dserv.Add(t.Context(), unique2)) + + dir1 := ft.EmptyDirNode() + dir1.AddNodeLink("shared", shared) + dir1.AddNodeLink("unique", unique1) + require.NoError(t, dserv.Add(t.Context(), dir1)) + + dir2 := ft.EmptyDirNode() + dir2.AddNodeLink("shared", shared) + dir2.AddNodeLink("unique", unique2) + require.NoError(t, dserv.Add(t.Context(), dir2)) + + tracker := walker.NewMapTracker() + fetch := walker.NodeFetcherFromBlockstore(bs) + var all []cid.Cid + + walker.WalkEntityRoots(t.Context(), dir1.Cid(), fetch, func(c cid.Cid) bool { + all = append(all, c) + return true + }, walker.WithVisitedTracker(tracker)) + + walker.WalkEntityRoots(t.Context(), dir2.Cid(), fetch, func(c cid.Cid) bool { + all = append(all, c) + return true + }, walker.WithVisitedTracker(tracker)) + + // dir1 + shared + unique1 + dir2 + unique2 = 5 + assert.Len(t, all, 5) + sharedCount := 0 + for _, c := range all { + if c == shared.Cid() { + sharedCount++ + } + } + assert.Equal(t, 1, sharedCount, "shared file emitted only once") +} + +// --- stop conditions --- + +func TestEntityWalk_EmitFalseStops(t *testing.T) { + // Returning false from emit must stop the walk immediately. + // Important for callers that want to limit results. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + dir := ft.EmptyDirNode() + for i := range 5 { + f := fileNodeWithData(t, []byte(fmt.Sprintf("f%d", i))) + require.NoError(t, dserv.Add(t.Context(), f)) + dir.AddNodeLink(fmt.Sprintf("f%d", i), f) + } + require.NoError(t, dserv.Add(t.Context(), dir)) + + count := 0 + fetch := walker.NodeFetcherFromBlockstore(bs) + walker.WalkEntityRoots(t.Context(), dir.Cid(), fetch, func(c cid.Cid) bool { + count++ + return count < 3 + }) + assert.Equal(t, 3, count) +} + +func TestEntityWalk_ContextCancellation(t *testing.T) { + // Walk must respect context cancellation and return the error. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + file := fileNodeWithData(t, []byte("file")) + require.NoError(t, dserv.Add(t.Context(), file)) + dir := ft.EmptyDirNode() + dir.AddNodeLink("file", file) + require.NoError(t, dserv.Add(t.Context(), dir)) + + ctx, cancel := context.WithCancel(t.Context()) + cancel() // cancel before walk starts + + fetch := walker.NodeFetcherFromBlockstore(bs) + err := walker.WalkEntityRoots(ctx, dir.Cid(), fetch, func(c cid.Cid) bool { + return true + }) + assert.ErrorIs(t, err, context.Canceled) +} + +// --- error handling --- + +func TestEntityWalk_FetchErrorSkips(t *testing.T) { + // Missing child blocks are skipped gracefully (best-effort). + // The walk continues with other branches. This prevents a single + // corrupt block from breaking the entire provide cycle. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + exists := fileNodeWithData(t, []byte("exists")) + require.NoError(t, dserv.Add(t.Context(), exists)) + + missing := fileNodeWithData(t, []byte("missing")) + // intentionally NOT added to blockstore + + dir := ft.EmptyDirNode() + dir.AddNodeLink("exists", exists) + dir.AddNodeLink("missing", missing) + require.NoError(t, dserv.Add(t.Context(), dir)) + + visited := collectEntityWalk(t, bs, dir.Cid()) + assert.Contains(t, visited, dir.Cid()) + assert.Contains(t, visited, exists.Cid()) + assert.NotContains(t, visited, missing.Cid(), + "missing block should be skipped, not crash") +} diff --git a/dag/walker/identity_test.go b/dag/walker/identity_test.go new file mode 100644 index 000000000..7924d152d --- /dev/null +++ b/dag/walker/identity_test.go @@ -0,0 +1,156 @@ +package walker_test + +// Tests verifying that identity CIDs (multihash 0x00) are handled +// correctly across all walker and provider paths. Identity CIDs embed +// data inline, so providing them to the DHT is wasteful. The walker +// traverses through them (following links) but never emits them. +// +// This covers: +// - WalkDAG: identity root, identity child, identity dag-pb dir +// - WalkEntityRoots: identity file, identity dir, mixed normal+identity +// - IsIdentityCID: predicate correctness + +import ( + "testing" + + "github.com/ipfs/boxo/ipld/merkledag" + mdtest "github.com/ipfs/boxo/ipld/merkledag/test" + ft "github.com/ipfs/boxo/ipld/unixfs" + cid "github.com/ipfs/go-cid" + format "github.com/ipfs/go-ipld-format" + mh "github.com/multiformats/go-multihash" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// makeIdentityCID creates a CIDv1 with an identity multihash (data +// inline in the CID). codec determines the CID codec prefix. +func makeIdentityCID(t *testing.T, data []byte, codec uint64) cid.Cid { + t.Helper() + hash, err := mh.Encode(data, mh.IDENTITY) + require.NoError(t, err) + return cid.NewCidV1(codec, hash) +} + +// --- WalkDAG identity tests --- + +func TestWalkDAG_IdentityCID(t *testing.T) { + t.Run("identity raw CID as root is not emitted", func(t *testing.T) { + bs := newTestBlockstore() + idCid := makeIdentityCID(t, []byte("inline"), cid.Raw) + + visited := collectWalk(t, bs, idCid) + assert.Empty(t, visited, + "identity CID root must not be emitted") + }) + + t.Run("dag-pb linking to identity child skips identity", func(t *testing.T) { + bs := newTestBlockstore() + dserv := merkledag.NewDAGService(mdtest.Bserv()) + + idChild := makeIdentityCID(t, []byte("inline-child"), cid.Raw) + + root := merkledag.NodeWithData([]byte("root")) + require.NoError(t, root.AddRawLink("inline", &format.Link{Cid: idChild})) + require.NoError(t, dserv.Add(t.Context(), root)) + require.NoError(t, bs.Put(t.Context(), root)) + + visited := collectWalk(t, bs, root.Cid()) + assert.Len(t, visited, 1, "only the non-identity root") + assert.Equal(t, root.Cid(), visited[0]) + assert.NotContains(t, visited, idChild, + "identity child must not be emitted") + }) + + t.Run("identity dag-pb directory with normal raw child", func(t *testing.T) { + // simulates `ipfs add --inline` producing a small dag-pb + // directory with identity multihash, linking to a normal + // raw block. The identity directory must not be emitted, + // but its normal child must be. + bs := newTestBlockstore() + + normalChild := putRawBlock(t, bs, []byte("normal-data")) + + // build a dag-pb directory node + dir := ft.EmptyDirNode() + require.NoError(t, dir.AddRawLink("child.bin", &format.Link{Cid: normalChild})) + + // re-encode the directory with an identity multihash to + // simulate what ipfs add --inline produces for small dirs + dirData := dir.RawData() + idHash, err := mh.Encode(dirData, mh.IDENTITY) + require.NoError(t, err) + idDirCid := cid.NewCidV1(cid.DagProtobuf, idHash) + // NOT stored in blockstore -- NewIdStore decodes from CID + + visited := collectWalk(t, bs, idDirCid) + assert.Len(t, visited, 1, "only the normal raw child") + assert.Equal(t, normalChild, visited[0], + "normal child reachable through identity dir must be emitted") + assert.NotContains(t, visited, idDirCid, + "identity directory must not be emitted") + }) +} + +// --- WalkEntityRoots identity tests --- + +func TestEntityWalk_IdentityFileNotEmitted(t *testing.T) { + bs := newTestBlockstore() + idFile := makeIdentityCID(t, []byte("tiny"), cid.Raw) + + visited := collectEntityWalk(t, bs, idFile) + assert.Empty(t, visited, "identity file must not be emitted") +} + +func TestEntityWalk_IdentityDirWithNormalChildren(t *testing.T) { + // An identity dag-pb directory (like `ipfs add --inline` produces + // for small dirs) linking to normal files. The identity directory + // must not be emitted, but its normal children must be. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + file1 := fileNodeWithData(t, []byte("file1")) + file2 := fileNodeWithData(t, []byte("file2")) + require.NoError(t, dserv.Add(t.Context(), file1)) + require.NoError(t, dserv.Add(t.Context(), file2)) + + dir := ft.EmptyDirNode() + dir.AddNodeLink("f1.txt", file1) + dir.AddNodeLink("f2.txt", file2) + + // re-encode with identity multihash + dirData := dir.RawData() + idHash, err := mh.Encode(dirData, mh.IDENTITY) + require.NoError(t, err) + idDirCid := cid.NewCidV1(cid.DagProtobuf, idHash) + + visited := collectEntityWalk(t, bs, idDirCid) + assert.Len(t, visited, 2, "both normal files emitted") + assert.Contains(t, visited, file1.Cid()) + assert.Contains(t, visited, file2.Cid()) + assert.NotContains(t, visited, idDirCid, + "identity directory must not be emitted") +} + +func TestEntityWalk_NormalDirWithIdentityChild(t *testing.T) { + // A normal directory containing an identity CID child. The + // directory is emitted, the identity child is not. + bs := newTestBlockstore() + dserv := newTestDAGService(bs) + + idChild := makeIdentityCID(t, []byte("inline-file"), cid.Raw) + + normalFile := fileNodeWithData(t, []byte("normal")) + require.NoError(t, dserv.Add(t.Context(), normalFile)) + + dir := ft.EmptyDirNode() + require.NoError(t, dir.AddRawLink("inline.bin", &format.Link{Cid: idChild})) + dir.AddNodeLink("normal.txt", normalFile) + require.NoError(t, dserv.Add(t.Context(), dir)) + + visited := collectEntityWalk(t, bs, dir.Cid()) + assert.Contains(t, visited, dir.Cid(), "normal directory emitted") + assert.Contains(t, visited, normalFile.Cid(), "normal file emitted") + assert.NotContains(t, visited, idChild, + "identity child must not be emitted") +} diff --git a/dag/walker/visited.go b/dag/walker/visited.go new file mode 100644 index 000000000..600b20bc9 --- /dev/null +++ b/dag/walker/visited.go @@ -0,0 +1,360 @@ +package walker + +import ( + "crypto/rand" + "encoding/binary" + "errors" + "fmt" + "math" + + bbloom "github.com/ipfs/bbloom" + cid "github.com/ipfs/go-cid" +) + +// Default bloom filter parameters. +// +// See [NewBloomTracker] for creating a tracker with a specific FP rate. +const ( + // DefaultBloomFPRate is the target false positive rate, expressed as + // 1/N (one false positive per N lookups). At the default value of + // ~1 in 4.75 million (~0.00002%), each CID costs ~4 bytes (32 bits) + // before ipfs/bbloom's power-of-two rounding. + // + // This is low enough for most IPFS deployments. IPFS content + // typically has multiple providers, so a single node's false + // positive has no impact on content availability. Any CID skipped + // by a false positive is caught in the next reprovide cycle. + // + // Actual memory depends on how [BloomTracker] chains blooms; see + // the scaling table in its documentation. As a rough guide, a + // single bloom sized for N items uses N*32 bits rounded up to the + // next power of two (e.g. 2M CIDs -> ~8 MB, 10M CIDs -> ~64 MB). + // + // Lowering this value (e.g. 1_000_000) uses less memory per CID + // but skips more CIDs. Raising it (e.g. 10_000_000) uses more + // memory but skips fewer. + DefaultBloomFPRate = 4_750_000 + + // DefaultBloomInitialCapacity is the number of expected items for + // the first bloom filter when no persisted count from a previous + // cycle exists. 2M items produces an ~8 MB bloom at the default FP + // rate, covering repos up to ~2M CIDs without chain growth. + DefaultBloomInitialCapacity = 2_000_000 + + // BloomGrowthMargin is multiplied with the persisted CID count from + // the previous reprovide cycle to size the initial bloom. The 1.5x + // margin provides headroom for repo growth between cycles so that + // a stable repo does not trigger chain growth on every cycle. + BloomGrowthMargin = 1.5 + + // BloomGrowthFactor determines how much larger each new bloom in the + // chain is compared to the previous one. 4x keeps the chain short + // (fewer blooms = less Has() overhead) while converging quickly to + // the actual repo size. + BloomGrowthFactor = 4 + + // MinBloomCapacity is the smallest expectedItems value accepted by + // [NewBloomTracker]. ipfs/bbloom derives k probe positions from a + // single SipHash via double hashing (h + i*l mod size). For small + // bitsets the stride patterns overlap, pushing the actual FP rate + // far above the designed target. Empirically, at capacity=1000 + // (32K-bit bitset) with k=22 the FP rate is ~50x worse than + // theory; at 10000 (512K bits) it matches. 10000 uses ~64 KB of + // memory while ensuring the actual FP rate matches the design. + MinBloomCapacity = 10_000 +) + +// BloomParams derives ipfs/bbloom parameters (bits per element, hash +// location count) from a target false positive rate expressed as 1/N. +// +// The number of hash functions k is derived as round(log2(N)), and +// bits per element as k / ln(2). Because k must be a positive integer, +// not every FP rate is exactly achievable -- the actual rate will be +// equal to or better than the target. Additionally, ipfs/bbloom rounds +// the total bitset to the next power of two, which further improves +// the actual rate. +func BloomParams(fpRate uint) (bitsPerElem uint, hashLocs uint) { + k := math.Round(math.Log2(float64(fpRate))) + if k < 1 { + k = 1 + } + bpe := k / math.Ln2 + return uint(math.Ceil(bpe)), uint(k) +} + +// VisitedTracker tracks which CIDs have been seen during DAG traversal. +// Implementations use c.Hash() (multihash bytes) as the key, so CIDv0 +// and CIDv1 of the same content are treated as the same entry. +// +// Implementations may be exact ([MapTracker]) or probabilistic +// ([BloomTracker]). Probabilistic implementations must keep the false +// positive rate negligible for the expected dataset size, or allow +// callers to adjust it (see [NewBloomTracker]). +// +// NOT safe for concurrent use. The provide pipeline runs on a single +// goroutine per reprovide cycle. Adding parallelism requires switching +// to thread-safe variants (bbloom AddTS/HasTS) or external +// synchronization. +type VisitedTracker interface { + // Visit marks a CID as visited. Returns true if it was NOT + // previously visited (first visit). + Visit(c cid.Cid) bool + // Has returns true if the CID was previously visited. + Has(c cid.Cid) bool +} + +var ( + _ VisitedTracker = (*cid.Set)(nil) + _ VisitedTracker = (*BloomTracker)(nil) + _ VisitedTracker = (*MapTracker)(nil) +) + +// MapTracker tracks visited CIDs using an in-memory map. Zero false +// positives. Useful for tests and small datasets. +// +// NOT safe for concurrent use. +type MapTracker struct { + set map[string]struct{} + deduplicated uint64 +} + +// NewMapTracker creates a new map-based visited tracker. +func NewMapTracker() *MapTracker { + return &MapTracker{set: make(map[string]struct{})} +} + +func (m *MapTracker) Visit(c cid.Cid) bool { + key := string(c.Hash()) + if _, ok := m.set[key]; ok { + m.deduplicated++ + return false + } + m.set[key] = struct{}{} + return true +} + +func (m *MapTracker) Has(c cid.Cid) bool { + _, ok := m.set[string(c.Hash())] + return ok +} + +// Deduplicated returns the number of Visit calls that returned false +// (CID already seen). Useful for logging how much dedup occurred. +func (m *MapTracker) Deduplicated() uint64 { return m.deduplicated } + +// BloomTracker tracks visited CIDs using a chain of bloom filters that +// grows automatically when the current filter becomes saturated. +// +// # Why it exists +// +// When the reprovide system walks pinned DAGs, many pins share the same +// sub-DAGs (e.g. append-only datasets where each version differs by a +// small delta). Without deduplication, the walker re-traverses every +// shared subtree for each pin -- O(pins * total_blocks) I/O. The bloom +// filter lets the walker skip already-visited subtrees in O(1), +// reducing work to O(unique_blocks). +// +// A single fixed-size bloom filter requires knowing the number of CIDs +// upfront. On the very first cycle (or after significant repo growth) +// this count is unknown. BloomTracker solves this by starting with a +// small bloom and automatically appending larger ones when the insert +// count reaches the current filter's designed capacity. +// +// # How it works +// +// BloomTracker maintains an ordered chain of bloom filters [b0, b1, ...]. +// Each filter's parameters (bits per element, hash count) are derived +// from the target false positive rate via [BloomParams]. +// +// - Has(c) checks ALL filters in the chain. If any filter reports the +// CID as present, it returns true. False positives are independent +// across filters because each uses unique random SipHash keys via +// [bbloom.NewWithKeys] (generated from crypto/rand). This also means +// different processes in a cluster hit different false positives, so +// a CID skipped by one node is still provided by others. +// - Visit(c) checks all filters first (like Has). If the CID is not +// found, it adds it to the latest filter and increments the insert +// counter. When inserts exceed the current filter's capacity, a new +// filter at BloomGrowthFactor times the capacity is appended. +// Saturation is detected via a simple integer comparison on every +// insert (O(1)). +// +// # Scaling behavior (at default FP rate) +// +// With DefaultBloomInitialCapacity = 2M and BloomGrowthFactor = 4x. +// Memory includes ipfs/bbloom's power-of-two rounding of each bitset. +// +// 2M CIDs: 1 bloom (~8 MB) +// 10M CIDs: 2 blooms (~42 MB) +// 40M CIDs: 3 blooms (~176 MB) +// 100M CIDs: 4 blooms (~713 MB) +// +// On subsequent cycles, the persisted count from the previous cycle +// sizes the initial bloom correctly (with BloomGrowthMargin headroom), +// so the chain typically stays at 1 bloom. +// +// # Concurrency +// +// NOT safe for concurrent use. See [VisitedTracker] for the +// single-goroutine invariant. +type BloomTracker struct { + chain []*bbloom.Bloom // oldest to newest + lastCap uint64 // designed capacity of the latest bloom + curInserts uint64 // inserts into current (latest) bloom + totalInserts uint64 // inserts across all blooms in chain + deduplicated uint64 // Visit calls that returned false + bitsPerElem uint // bits per element (derived from FP rate) + hashLocs uint // hash function count (derived from FP rate) +} + +// NewBloomTracker creates a bloom filter tracker sized for expectedItems +// at the given false positive rate (expressed as 1/N via fpRate). +// +// The bloom parameters (bits per element, hash count) are derived from +// fpRate via [BloomParams]. Because the hash count must be a positive +// integer, the actual FP rate may be slightly better than the target. +// ipfs/bbloom also rounds the bitset to the next power of two, further +// improving the actual rate. +// +// Returns an error if expectedItems is below [MinBloomCapacity], or +// fpRate is zero. +// +// When inserts exceed the current filter's capacity, a new filter at +// BloomGrowthFactor times the capacity is appended automatically. +// +// NOT safe for concurrent use. See [VisitedTracker] for the +// single-goroutine invariant. +func NewBloomTracker(expectedItems uint, fpRate uint) (*BloomTracker, error) { + if expectedItems < MinBloomCapacity { + return nil, fmt.Errorf("bloom tracker: expectedItems must be >= %d (got %d); "+ + "small blooms cause FP rates far above the design target "+ + "because ipfs/bbloom's double-hashing needs a large bitset", + MinBloomCapacity, expectedItems) + } + if fpRate == 0 { + return nil, errors.New("bloom tracker: fpRate must be > 0") + } + bpe, hlocs := BloomParams(fpRate) + b, err := newBloom(uint64(expectedItems), bpe, hlocs) + if err != nil { + return nil, fmt.Errorf("bloom tracker: %w", err) + } + log.Infow("bloom tracker created", + "capacity", expectedItems, + "fpRate", fmt.Sprintf("1 in %d (~%.6f%%)", fpRate, 100.0/float64(fpRate)), + "bitsPerElem", bpe, + "hashFunctions", hlocs) + return &BloomTracker{ + chain: []*bbloom.Bloom{b}, + lastCap: uint64(expectedItems), + bitsPerElem: bpe, + hashLocs: hlocs, + }, nil +} + +func (bt *BloomTracker) Has(c cid.Cid) bool { + key := []byte(c.Hash()) + // Iterate oldest to newest: frequently-repeated CIDs (e.g. shared + // sub-DAGs across many pins) land in the earliest filter, so + // checking old-first finds them with fewer probes. The alternative + // (newest-first) would help if duplicates cluster near each other + // in traversal order, but real DAG walks revisit globally popular + // subtrees more often than recent ones. + for _, b := range bt.chain { + if b.Has(key) { + return true + } + } + return false +} + +func (bt *BloomTracker) Visit(c cid.Cid) bool { + key := []byte(c.Hash()) + + // Check earlier blooms for the CID (oldest to newest, same + // rationale as Has). If any reports it as present (true positive + // from a prior growth epoch, or rare cross-bloom false positive), + // skip it. + earlier := bt.chain[:len(bt.chain)-1] + for _, b := range earlier { + if b.Has(key) { + bt.deduplicated++ + return false + } + } + + // Use AddIfNotHas on the current bloom: it atomically hashes, + // checks, and sets the bits in a single pass. This avoids the + // false-positive window that exists when Has() and Add() are + // called separately (a genuinely new CID could match already-set + // bits from other inserts, causing Has to return true and the CID + // to be silently skipped). + cur := bt.chain[len(bt.chain)-1] + if !cur.AddIfNotHas(key) { + bt.deduplicated++ + return false + } + bt.curInserts++ + bt.totalInserts++ + if bt.curInserts > bt.lastCap { + bt.grow() + } + return true +} + +// Count returns the total number of unique CIDs added across all blooms. +// Used to persist the cycle count for sizing the next cycle's bloom. +func (bt *BloomTracker) Count() uint64 { return bt.totalInserts } + +// Deduplicated returns the number of Visit calls that returned false +// (CID already seen or bloom false positive). Useful for logging how +// much dedup occurred in a reprovide cycle. +func (bt *BloomTracker) Deduplicated() uint64 { return bt.deduplicated } + +// grow appends a new bloom filter to the chain at BloomGrowthFactor +// times the previous capacity with fresh random SipHash keys. +// +// The grown capacity is always >= BloomGrowthFactor * MinBloomCapacity +// because NewBloomTracker enforces expectedItems >= MinBloomCapacity, +// so the double-hashing FP rate issue with small bitsets cannot occur. +func (bt *BloomTracker) grow() { + newCap := bt.lastCap * BloomGrowthFactor + b, err := newBloom(newCap, bt.bitsPerElem, bt.hashLocs) + if err != nil { + // bitsPerElem and hashLocs are validated at construction time, + // so this is unreachable unless something is deeply wrong. + panic(fmt.Sprintf("bloom grow: %v", err)) + } + log.Infow("bloom tracker autoscaled", + "prevCapacity", bt.lastCap, + "newCapacity", newCap, + "totalInserts", bt.totalInserts, + "chainLength", len(bt.chain)+1) + bt.chain = append(bt.chain, b) + bt.lastCap = newCap + bt.curInserts = 0 +} + +// newBloom creates a single bbloom filter with random SipHash keys. +func newBloom(capacity uint64, bitsPerElem, hashLocs uint) (*bbloom.Bloom, error) { + k0, k1 := randomSipHashKeys() + return bbloom.NewWithKeys(k0, k1, + float64(capacity*uint64(bitsPerElem)), + float64(hashLocs)) +} + +// randomSipHashKeys generates fresh random SipHash keys via crypto/rand. +// Every bloom instance (across chain growth AND across process restarts) +// gets unique keys. This ensures: +// - false positives are uncorrelated across blooms in the chain +// - in a cluster running multiple kubo instances, each node's bloom +// hits different false positives, so a CID skipped by one node is +// still provided by others +func randomSipHashKeys() (uint64, uint64) { + var buf [16]byte + if _, err := rand.Read(buf[:]); err != nil { + panic(fmt.Sprintf("bloom: crypto/rand failed: %v", err)) + } + return binary.LittleEndian.Uint64(buf[:8]), + binary.LittleEndian.Uint64(buf[8:]) +} diff --git a/dag/walker/visited_test.go b/dag/walker/visited_test.go new file mode 100644 index 000000000..f93c88ab6 --- /dev/null +++ b/dag/walker/visited_test.go @@ -0,0 +1,285 @@ +package walker + +import ( + "crypto/sha256" + "encoding/binary" + "testing" + + cid "github.com/ipfs/go-cid" + mh "github.com/multiformats/go-multihash" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// makeCID creates a deterministic CIDv1 raw from an integer seed. +func makeCID(i int) cid.Cid { + var buf [4]byte + binary.LittleEndian.PutUint32(buf[:], uint32(i)) + h := sha256.Sum256(buf[:]) + hash, _ := mh.Encode(h[:], mh.SHA2_256) + return cid.NewCidV1(cid.Raw, hash) +} + +func makeCIDs(n int) []cid.Cid { + cids := make([]cid.Cid, n) + for i := range n { + cids[i] = makeCID(i) + } + return cids +} + +func TestBloomParams(t *testing.T) { + t.Run("default FP rate", func(t *testing.T) { + bpe, k := BloomParams(DefaultBloomFPRate) + // 1 in 4.75M -> k=round(log2(4750000))=22, bpe=ceil(22/ln2)=32 + assert.Equal(t, uint(32), bpe) + assert.Equal(t, uint(22), k) + }) + + t.Run("lower FP rate uses less memory", func(t *testing.T) { + bpeLow, _ := BloomParams(1_000_000) // 1 in 1M + bpeHigh, _ := BloomParams(DefaultBloomFPRate) + assert.Less(t, bpeLow, bpeHigh) + }) + + t.Run("higher FP rate uses more memory", func(t *testing.T) { + bpeDefault, _ := BloomParams(DefaultBloomFPRate) + bpeHigher, _ := BloomParams(10_000_000) + assert.Greater(t, bpeHigher, bpeDefault) + }) + + t.Run("fpRate=1 gives minimum params", func(t *testing.T) { + bpe, k := BloomParams(1) + assert.Equal(t, uint(1), k) + assert.GreaterOrEqual(t, bpe, uint(1)) + }) +} + +func TestMapTracker(t *testing.T) { + t.Run("visit and has", func(t *testing.T) { + m := NewMapTracker() + c := makeCID(0) + + assert.False(t, m.Has(c)) + assert.True(t, m.Visit(c)) // first visit + assert.True(t, m.Has(c)) + assert.False(t, m.Visit(c)) // second visit + }) + + t.Run("distinct CIDs are independent", func(t *testing.T) { + m := NewMapTracker() + c1 := makeCID(0) + c2 := makeCID(1) + + m.Visit(c1) + assert.True(t, m.Has(c1)) + assert.False(t, m.Has(c2)) + }) +} + +func TestBloomTracker(t *testing.T) { + t.Run("visit and has", func(t *testing.T) { + bt, err := NewBloomTracker(MinBloomCapacity, DefaultBloomFPRate) + require.NoError(t, err) + + c := makeCID(0) + + assert.False(t, bt.Has(c)) + assert.True(t, bt.Visit(c)) // first visit + assert.True(t, bt.Has(c)) + assert.False(t, bt.Visit(c)) // second visit + }) + + t.Run("count tracks unique inserts", func(t *testing.T) { + bt, err := NewBloomTracker(MinBloomCapacity, DefaultBloomFPRate) + require.NoError(t, err) + + cids := makeCIDs(100) + for _, c := range cids { + bt.Visit(c) + } + assert.Equal(t, uint64(100), bt.Count()) + + // revisiting doesn't increase count + for _, c := range cids { + bt.Visit(c) + } + assert.Equal(t, uint64(100), bt.Count()) + }) + + t.Run("chain growth on saturation", func(t *testing.T) { + bt, err := NewBloomTracker(MinBloomCapacity, DefaultBloomFPRate) + require.NoError(t, err) + assert.Len(t, bt.chain, 1) + + cids := makeCIDs(5 * MinBloomCapacity) + for _, c := range cids { + bt.Visit(c) + } + assert.Greater(t, len(bt.chain), 1, "chain should grow") + + // all inserted CIDs should still be found across the chain + for _, c := range cids { + assert.True(t, bt.Has(c), "CID should be found after chain growth") + } + }) + + t.Run("count survives chain growth", func(t *testing.T) { + // Insert just past capacity to trigger exactly one grow(). + // Minimal items in bloom2 keeps FP exposure negligible. + const total = MinBloomCapacity + 2 + bt, err := NewBloomTracker(MinBloomCapacity, DefaultBloomFPRate) + require.NoError(t, err) + + cids := makeCIDs(total) + for _, c := range cids { + bt.Visit(c) + } + assert.Greater(t, len(bt.chain), 1, "chain should have grown") + assert.Equal(t, uint64(total), bt.Count()) + }) + + t.Run("below MinBloomCapacity returns error", func(t *testing.T) { + _, err := NewBloomTracker(MinBloomCapacity-1, DefaultBloomFPRate) + require.Error(t, err) + assert.Contains(t, err.Error(), "expectedItems must be") + }) + + t.Run("zero fpRate returns error", func(t *testing.T) { + _, err := NewBloomTracker(MinBloomCapacity, 0) + require.Error(t, err) + assert.Contains(t, err.Error(), "fpRate must be > 0") + }) + + t.Run("FP regression at measurable rate", func(t *testing.T) { + // Use fpRate=1000 (1 in 1000) so we get enough FPs in 100K + // probes to be statistically measurable. + // + // Catches regressions in BloomParams derivation, bbloom + // behavior, or parameter coupling bugs. We allow 5x tolerance + // because bbloom's power-of-two rounding makes the actual rate + // better than target. + const ( + fpTarget = 1000 + n = 50_000 + probes = 100_000 + ) + bt, err := NewBloomTracker(uint(n), fpTarget) + require.NoError(t, err) + + for _, c := range makeCIDs(n) { + bt.Visit(c) + } + + fpCount := 0 + for i := n; i < n+probes; i++ { + if bt.Has(makeCID(i)) { + fpCount++ + } + } + observedRate := float64(fpCount) / float64(probes) + expectedRate := 1.0 / float64(fpTarget) + + t.Logf("FP regression: %d / %d = %.4f%% (target: %.4f%%, 1 in %d)", + fpCount, probes, observedRate*100, expectedRate*100, fpTarget) + + assert.Less(t, observedRate, expectedRate*5, + "FP rate %.4f%% is >5x worse than target %.4f%%", observedRate*100, expectedRate*100) + }) + + t.Run("FP regression at default rate", func(t *testing.T) { + // DefaultBloomFPRate is ~1 in 4.75M. With 100K probes the + // expected FP count is 100K/4.75M = ~0.02, so we should see + // exactly 0. Any non-zero result indicates a regression. + const n = 100_000 + bt, err := NewBloomTracker(uint(n), DefaultBloomFPRate) + require.NoError(t, err) + + for _, c := range makeCIDs(n) { + bt.Visit(c) + } + + fpCount := 0 + for i := n; i < 2*n; i++ { + if bt.Has(makeCID(i)) { + fpCount++ + } + } + t.Logf("default rate FPs: %d / %d (expected: 0 at ~1 in %d)", + fpCount, n, DefaultBloomFPRate) + assert.Equal(t, 0, fpCount, + "at ~1 in 4.75M FP rate, 100K probes should produce 0 FPs") + }) +} + +func TestBloomAndMapEquivalence(t *testing.T) { + bt, err := NewBloomTracker(MinBloomCapacity, DefaultBloomFPRate) + require.NoError(t, err) + mt := NewMapTracker() + + cids := makeCIDs(500) + + for _, c := range cids { + bv := bt.Visit(c) + mv := mt.Visit(c) + assert.Equal(t, mv, bv, "Visit mismatch for %s", c) + } + + for _, c := range cids { + bh := bt.Has(c) + mh := mt.Has(c) + assert.Equal(t, mh, bh, "Has mismatch for %s", c) + } +} + +func TestCidSetSatisfiesInterface(t *testing.T) { + var tracker VisitedTracker = cid.NewSet() + + c := makeCID(42) + assert.False(t, tracker.Has(c)) + assert.True(t, tracker.Visit(c)) + assert.True(t, tracker.Has(c)) + assert.False(t, tracker.Visit(c)) +} + +func TestBloomTrackerUniqueKeys(t *testing.T) { + bt1, err := NewBloomTracker(MinBloomCapacity, DefaultBloomFPRate) + require.NoError(t, err) + bt2, err := NewBloomTracker(MinBloomCapacity, DefaultBloomFPRate) + require.NoError(t, err) + + cids := makeCIDs(500) + for _, c := range cids { + bt1.Visit(c) + bt2.Visit(c) + } + + for _, c := range cids { + assert.True(t, bt1.Has(c)) + assert.True(t, bt2.Has(c)) + } + + // Check 10K non-inserted CIDs: false positives should differ + // between trackers since each uses independent random SipHash keys. + // With shared keys, fpBoth == fp1 == fp2 (correlated). + // With independent keys, P(both FP on same CID) is negligible. + var fp1, fp2, fpBoth int + for i := 500; i < 10500; i++ { + c := makeCID(i) + h1 := bt1.Has(c) + h2 := bt2.Has(c) + if h1 { + fp1++ + } + if h2 { + fp2++ + } + if h1 && h2 { + fpBoth++ + } + } + t.Logf("independent key FPs: bt1=%d, bt2=%d, both=%d (out of 10000 probes)", fp1, fp2, fpBoth) + if fp1 > 0 && fp2 > 0 { + assert.Less(t, fpBoth, fp1, "FPs should be uncorrelated between instances") + } +} diff --git a/dag/walker/walker.go b/dag/walker/walker.go new file mode 100644 index 000000000..d71f27f6d --- /dev/null +++ b/dag/walker/walker.go @@ -0,0 +1,248 @@ +package walker + +import ( + "bytes" + "context" + "fmt" + "io" + "slices" + + blockstore "github.com/ipfs/boxo/blockstore" + cid "github.com/ipfs/go-cid" + logging "github.com/ipfs/go-log/v2" + ipld "github.com/ipld/go-ipld-prime" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" + mh "github.com/multiformats/go-multihash" +) + +var log = logging.Logger("dagwalker") + +// LinksFetcher returns child link CIDs for a given CID. +// Used by [WalkDAG] which doesn't need entity type information. +type LinksFetcher func(ctx context.Context, c cid.Cid) ([]cid.Cid, error) + +// Option configures [WalkDAG] and [WalkEntityRoots]. +type Option func(*walkConfig) + +type walkConfig struct { + tracker VisitedTracker + locality func(context.Context, cid.Cid) (bool, error) +} + +// WithVisitedTracker sets the tracker used for cross-walk +// deduplication. When set, CIDs already visited (by this walk or a +// previous walk sharing the same tracker) are skipped along with +// their entire subtree. +func WithVisitedTracker(t VisitedTracker) Option { + return func(c *walkConfig) { c.tracker = t } +} + +// WithLocality sets a check function that determines whether a CID is +// locally available. When set, the walker only emits and descends into +// CIDs for which check returns true. Used by MFS providers to skip +// blocks not in the local blockstore (pass blockstore.Has directly). +// +// The locality check runs after the [VisitedTracker] check (which is +// a cheap in-memory operation), so already-visited CIDs never pay the +// locality I/O cost. +func WithLocality(check func(context.Context, cid.Cid) (bool, error)) Option { + return func(c *walkConfig) { c.locality = check } +} + +// WalkDAG performs an iterative depth-first walk of the DAG rooted at +// root, calling emit for each visited CID. Returns when the DAG is +// fully walked, emit returns false, or ctx is cancelled. +// +// The walk uses an explicit stack (not recursion) to avoid stack +// overflow on deep DAGs. For each CID: +// +// 1. [VisitedTracker].Visit -- if already visited, skip entire subtree. +// The CID is marked visited immediately (before fetch). If fetch +// later fails, the CID stays in the tracker and won't be retried +// this cycle, but is caught in the next reprovide cycle. This +// avoids a double bloom scan per CID. +// 2. If [WithLocality] is set, check locality -- if not local, skip. +// 3. Fetch block via fetch -- on error, log and skip. +// 4. Push child link CIDs to stack (deduped when popped at step 1). +// 5. Call emit(c) -- return false to stop the walk. +// +// # Traversal order +// +// Pre-order DFS with left-to-right sibling visiting: the root CID is +// always emitted first, and children are visited in the order they +// appear in the block's link list. This matches the legacy +// fetcherhelpers.BlockAll selector traversal and the conventional DFS +// order described in IPIP-0412. +func WalkDAG( + ctx context.Context, + root cid.Cid, + fetch LinksFetcher, + emit func(cid.Cid) bool, + opts ...Option, +) error { + cfg := &walkConfig{} + for _, o := range opts { + o(cfg) + } + return walkLoop(ctx, root, func(ctx context.Context, c cid.Cid) ([]cid.Cid, error) { + return fetch(ctx, c) + }, emit, cfg) +} + +// walkLoop is the shared iterative DFS implementation used by both +// [WalkDAG] and [WalkEntityRoots]. The fetch callback returns the +// children to push onto the stack; returning nil means "do not descend" +// (the CID is still emitted unless it is an identity CID). +func walkLoop( + ctx context.Context, + root cid.Cid, + fetch func(context.Context, cid.Cid) ([]cid.Cid, error), + emit func(cid.Cid) bool, + cfg *walkConfig, +) error { + stack := []cid.Cid{root} + + for len(stack) > 0 { + if ctx.Err() != nil { + return ctx.Err() + } + + // pop + c := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + // visit (mark + dedup in one call). If the CID was already + // visited (by this walk or a prior walk sharing the tracker), + // skip it and its entire subtree. + if cfg.tracker != nil && !cfg.tracker.Visit(c) { + continue + } + + // locality check + if cfg.locality != nil { + local, err := cfg.locality(ctx, c) + if err != nil { + log.Errorf("walk: locality check %s: %s", c, err) + continue + } + if !local { + continue + } + } + + // fetch and get children to descend into + children, err := fetch(ctx, c) + if err != nil { + log.Errorf("walk: fetch %s: %s", c, err) + continue + } + + // push children in reverse order so the first link is on top + // of the stack and gets popped next (left-to-right sibling + // visit order, matching IPIP-0412 pre-order DFS). + slices.Reverse(children) + stack = append(stack, children...) + + // skip identity CIDs: content is inline, no need to provide. + // children are still pushed (above) so an inlined dag-pb + // directory's normal children get walked. + if c.Prefix().MhType == mh.IDENTITY { + continue + } + if !emit(c) { + return nil + } + } + + return nil +} + +// linkSystemForBlockstore creates an ipld.LinkSystem backed by a +// blockstore, used by both [LinksFetcherFromBlockstore] and +// [NodeFetcherFromBlockstore]. The blockstore is wrapped with +// [blockstore.NewIdStore] so identity CIDs (multihash code 0x00, +// data inline in the CID) are decoded transparently without +// requiring a datastore lookup. +func linkSystemForBlockstore(bs blockstore.Blockstore) ipld.LinkSystem { + idBS := blockstore.NewIdStore(bs) + ls := cidlink.DefaultLinkSystem() + ls.TrustedStorage = true + ls.StorageReadOpener = func(lctx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) { + cl, ok := lnk.(cidlink.Link) + if !ok { + return nil, fmt.Errorf("unsupported link type: %T", lnk) + } + blk, err := idBS.Get(lctx.Ctx, cl.Cid) + if err != nil { + return nil, err + } + return bytes.NewReader(blk.RawData()), nil + } + return ls +} + +// LinksFetcherFromBlockstore creates a [LinksFetcher] backed by a +// local blockstore. Blocks are decoded using the codecs registered in +// the global multicodec registry (via ipld-prime's +// [cidlink.DefaultLinkSystem]). Identity CIDs are handled +// transparently via [blockstore.NewIdStore]. +// +// For custom link extraction, pass your own [LinksFetcher] to +// [WalkDAG] directly. +func LinksFetcherFromBlockstore(bs blockstore.Blockstore) LinksFetcher { + ls := linkSystemForBlockstore(bs) + + return func(ctx context.Context, c cid.Cid) ([]cid.Cid, error) { + lnk := cidlink.Link{Cid: c} + nd, err := ls.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any) + if err != nil { + return nil, err + } + return collectLinks(c, nd), nil + } +} + +// collectLinks extracts all link CIDs from an ipld-prime node by +// recursively traversing maps, lists, and scalar link values. Only map +// values are inspected, not keys (no known IPLD codec uses link-typed +// map keys). parent is used for debug logging only. +func collectLinks(parent cid.Cid, nd ipld.Node) []cid.Cid { + var links []cid.Cid + collectLinksRecursive(parent, nd, &links) + return links +} + +func collectLinksRecursive(parent cid.Cid, nd ipld.Node, out *[]cid.Cid) { + switch nd.Kind() { + case ipld.Kind_Link: + lnk, err := nd.AsLink() + if err != nil { + log.Debugw("walk: link extraction failed", "cid", parent, "error", err) + return + } + if cl, ok := lnk.(cidlink.Link); ok { + *out = append(*out, cl.Cid) + } + case ipld.Kind_Map: + itr := nd.MapIterator() + for !itr.Done() { + _, v, err := itr.Next() + if err != nil { + log.Debugw("walk: map iteration failed", "cid", parent, "error", err) + break + } + collectLinksRecursive(parent, v, out) + } + case ipld.Kind_List: + itr := nd.ListIterator() + for !itr.Done() { + _, v, err := itr.Next() + if err != nil { + log.Debugw("walk: list iteration failed", "cid", parent, "error", err) + break + } + collectLinksRecursive(parent, v, out) + } + } +} diff --git a/dag/walker/walker_bench_test.go b/dag/walker/walker_bench_test.go new file mode 100644 index 000000000..2e689b835 --- /dev/null +++ b/dag/walker/walker_bench_test.go @@ -0,0 +1,381 @@ +package walker_test + +// Benchmarks comparing boxo/dag/walker against the legacy ipld-prime +// selector-based traversal (fetcherhelpers.BlockAll). +// +// Legacy path (kubo Provide.Strategy=pinned today): +// +// bsfetcher with dagpb.AddSupportToChooser -> BlockAll selector traversal +// (uses OfflineIPLDFetcher -- NO unixfsnode.Reify, that's MFS-only) +// +// New path: +// +// WalkDAG + LinksFetcherFromBlockstore -> iterative DFS with VisitedTracker +// (uses cidlink.DefaultLinkSystem from the blockstore) +// +// # Why the new walker is faster (even without dedup) +// +// The WalkerNoTracker variant has no bloom/map overhead yet is still +// ~2x faster than BlockAll. The speedup comes from architectural +// differences, not deduplication: +// +// - No selector overhead: BlockAll constructs and interprets an +// ipld-prime selector (ExploreRecursive + ExploreAll) for every +// traversal. This involves building selector nodes, matching them +// against the data model at each step, and maintaining selector +// state across recursion levels. WalkDAG uses a plain stack-based +// DFS with zero selector machinery. +// - Simpler node decoding: BlockAll goes through bsfetcher which +// wraps blockservice, creates per-session fetchers, and resolves +// nodes via the full ipld-prime linking/loading pipeline with +// prototype choosers. WalkDAG uses cidlink.DefaultLinkSystem +// directly against the blockstore, skipping the blockservice and +// session layers entirely. +// - Fewer allocations: the selector path allocates FetchResult +// structs, selector state, and intermediate node wrappers per +// visited node. WalkDAG allocates only a CID slice per node +// (the child links) and reuses the stack. This shows in the +// benchmarks as ~30-40% fewer allocs/op. +// +// Dedup (bloom/map) adds a small overhead (~8-15%) on single walks +// but pays off across multiple walks of overlapping DAGs, which is +// the primary use case in reprovide cycles. +// +// # DAG types benchmarked +// +// - dag-pb: standard UnixFS DAGs (most common pinned content) +// - dag-cbor: IPLD-native DAGs (supported by pinned strategy, not UnixFS) +// - mixed: dag-cbor root linking to dag-pb subtrees (realistic for +// applications that wrap UnixFS content in dag-cbor metadata) +// +// # Variants per DAG type +// +// - BlockAll: legacy ipld-prime selector traversal (baseline) +// - WalkerNoTracker: new walker without dedup (pure walk cost) +// - WalkerMapTracker: new walker with exact map-based dedup +// - WalkerBloomTracker: new walker with bloom filter dedup +// - BloomSecondWalk: bloom Has() skip speed when all CIDs already +// in bloom (simulates cross-pin shared subtree skipping) + +import ( + "bytes" + "context" + "fmt" + "io" + "testing" + + "github.com/ipfs/boxo/blockservice" + blockstore "github.com/ipfs/boxo/blockstore" + "github.com/ipfs/boxo/dag/walker" + "github.com/ipfs/boxo/exchange/offline" + "github.com/ipfs/boxo/fetcher" + fetcherhelpers "github.com/ipfs/boxo/fetcher/helpers" + bsfetcher "github.com/ipfs/boxo/fetcher/impl/blockservice" + "github.com/ipfs/boxo/ipld/merkledag" + mdtest "github.com/ipfs/boxo/ipld/merkledag/test" + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" + ds "github.com/ipfs/go-datastore" + dssync "github.com/ipfs/go-datastore/sync" + dagpb "github.com/ipld/go-codec-dagpb" + ipld "github.com/ipld/go-ipld-prime" + _ "github.com/ipld/go-ipld-prime/codec/dagcbor" + "github.com/ipld/go-ipld-prime/fluent/qp" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" +) + +type benchDAG struct { + bs blockstore.Blockstore + bserv blockservice.BlockService + root cid.Cid + numNodes int + fetchFac fetcher.Factory // matches kubo's OfflineIPLDFetcher +} + +// makeBenchDAG_DagPB creates a pure dag-pb tree. +func makeBenchDAG_DagPB(b *testing.B, fanout, depth uint) *benchDAG { + b.Helper() + store := blockstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore())) + bserv := blockservice.New(store, offline.Exchange(store)) + dserv := merkledag.NewDAGService(bserv) + gen := mdtest.NewDAGGenerator() + root, allCids, err := gen.MakeDagNode(dserv.Add, fanout, depth) + if err != nil { + b.Fatal(err) + } + return &benchDAG{ + bs: store, + bserv: bserv, + root: root, + numNodes: len(allCids), + fetchFac: makeIPLDFetcher(bserv), + } +} + +// makeBenchDAG_DagCBOR creates a pure dag-cbor tree where each node +// is a map with "data" (bytes) and "links" (list of CID links). +func makeBenchDAG_DagCBOR(b *testing.B, fanout, depth int) *benchDAG { + b.Helper() + store := blockstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore())) + bserv := blockservice.New(store, offline.Exchange(store)) + + ls := cidlink.DefaultLinkSystem() + ls.StorageWriteOpener = func(lctx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { + buf := bytes.Buffer{} + return &buf, func(lnk ipld.Link) error { + cl := lnk.(cidlink.Link) + blk, err := blocks.NewBlockWithCid(buf.Bytes(), cl.Cid) + if err != nil { + return err + } + return store.Put(context.Background(), blk) + }, nil + } + + lp := cidlink.LinkPrototype{Prefix: cid.Prefix{ + Version: 1, + Codec: cid.DagCBOR, + MhType: 0x12, // sha2-256 + MhLength: 32, + }} + + count := 0 + var build func(d int) ipld.Link + build = func(d int) ipld.Link { + var childLinks []ipld.Link + if d < depth { + for range fanout { + childLinks = append(childLinks, build(d+1)) + } + } + nd, err := qp.BuildMap(basicnode.Prototype.Any, -1, func(ma ipld.MapAssembler) { + qp.MapEntry(ma, "data", qp.Bytes(fmt.Appendf(nil, "node-%d", count))) + qp.MapEntry(ma, "links", qp.List(-1, func(la ipld.ListAssembler) { + for _, cl := range childLinks { + qp.ListEntry(la, qp.Link(cl)) + } + })) + }) + if err != nil { + b.Fatal(err) + } + lnk, err := ls.Store(ipld.LinkContext{}, lp, nd) + if err != nil { + b.Fatal(err) + } + count++ + return lnk + } + + rootLink := build(0) + rootCid := rootLink.(cidlink.Link).Cid + + return &benchDAG{ + bs: store, + bserv: bserv, + root: rootCid, + numNodes: count, + fetchFac: makeIPLDFetcher(bserv), + } +} + +// makeBenchDAG_Mixed creates a dag-cbor root that links to multiple +// dag-pb subtrees (realistic: dag-cbor metadata wrapping UnixFS content). +func makeBenchDAG_Mixed(b *testing.B, dagPBFanout, dagPBDepth uint, numPBSubtrees int) *benchDAG { + b.Helper() + store := blockstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore())) + bserv := blockservice.New(store, offline.Exchange(store)) + dserv := merkledag.NewDAGService(bserv) + + // build dag-pb subtrees + gen := mdtest.NewDAGGenerator() + var subtreeRoots []cid.Cid + totalNodes := 0 + for range numPBSubtrees { + root, allCids, err := gen.MakeDagNode(dserv.Add, dagPBFanout, dagPBDepth) + if err != nil { + b.Fatal(err) + } + subtreeRoots = append(subtreeRoots, root) + totalNodes += len(allCids) + } + + // build dag-cbor root linking to all dag-pb subtrees + ls := cidlink.DefaultLinkSystem() + ls.StorageWriteOpener = func(lctx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { + buf := bytes.Buffer{} + return &buf, func(lnk ipld.Link) error { + cl := lnk.(cidlink.Link) + blk, err := blocks.NewBlockWithCid(buf.Bytes(), cl.Cid) + if err != nil { + return err + } + return store.Put(context.Background(), blk) + }, nil + } + lp := cidlink.LinkPrototype{Prefix: cid.Prefix{ + Version: 1, + Codec: cid.DagCBOR, + MhType: 0x12, + MhLength: 32, + }} + + nd, err := qp.BuildMap(basicnode.Prototype.Any, -1, func(ma ipld.MapAssembler) { + qp.MapEntry(ma, "type", qp.String("metadata")) + qp.MapEntry(ma, "subtrees", qp.List(-1, func(la ipld.ListAssembler) { + for _, r := range subtreeRoots { + qp.ListEntry(la, qp.Link(cidlink.Link{Cid: r})) + } + })) + }) + if err != nil { + b.Fatal(err) + } + rootLink, err := ls.Store(ipld.LinkContext{}, lp, nd) + if err != nil { + b.Fatal(err) + } + totalNodes++ // the dag-cbor root itself + + return &benchDAG{ + bs: store, + bserv: bserv, + root: rootLink.(cidlink.Link).Cid, + numNodes: totalNodes, + fetchFac: makeIPLDFetcher(bserv), + } +} + +// makeIPLDFetcher matches kubo's OfflineIPLDFetcher setup: +// dagpb prototype chooser, SkipNotFound, NO unixfsnode.Reify. +func makeIPLDFetcher(bserv blockservice.BlockService) fetcher.Factory { + f := bsfetcher.NewFetcherConfig(bserv) + f.SkipNotFound = true + f.PrototypeChooser = dagpb.AddSupportToChooser(bsfetcher.DefaultPrototypeChooser) + return f +} + +// --- walk functions --- + +func benchBlockAll(b *testing.B, dag *benchDAG) { + b.Helper() + ctx := context.Background() + b.ResetTimer() + for range b.N { + session := dag.fetchFac.NewSession(ctx) + err := fetcherhelpers.BlockAll(ctx, session, cidlink.Link{Cid: dag.root}, func(res fetcher.FetchResult) error { + return nil + }) + if err != nil { + b.Fatal(err) + } + } +} + +func benchWalkerNoTracker(b *testing.B, dag *benchDAG) { + b.Helper() + ctx := context.Background() + fetch := walker.LinksFetcherFromBlockstore(dag.bs) + b.ResetTimer() + for range b.N { + walker.WalkDAG(ctx, dag.root, fetch, func(c cid.Cid) bool { return true }) + } +} + +func benchWalkerMapTracker(b *testing.B, dag *benchDAG) { + b.Helper() + ctx := context.Background() + fetch := walker.LinksFetcherFromBlockstore(dag.bs) + b.ResetTimer() + for range b.N { + t := walker.NewMapTracker() + walker.WalkDAG(ctx, dag.root, fetch, func(c cid.Cid) bool { return true }, walker.WithVisitedTracker(t)) + } +} + +func benchWalkerBloomTracker(b *testing.B, dag *benchDAG) { + b.Helper() + ctx := context.Background() + fetch := walker.LinksFetcherFromBlockstore(dag.bs) + b.ResetTimer() + for range b.N { + t, _ := walker.NewBloomTracker(max(uint(dag.numNodes), walker.MinBloomCapacity), walker.DefaultBloomFPRate) + walker.WalkDAG(ctx, dag.root, fetch, func(c cid.Cid) bool { return true }, walker.WithVisitedTracker(t)) + } +} + +func benchWalkerBloomSecondWalk(b *testing.B, dag *benchDAG) { + b.Helper() + ctx := context.Background() + fetch := walker.LinksFetcherFromBlockstore(dag.bs) + b.ResetTimer() + for range b.N { + t, _ := walker.NewBloomTracker(max(uint(dag.numNodes), walker.MinBloomCapacity), walker.DefaultBloomFPRate) + walker.WalkDAG(ctx, dag.root, fetch, func(c cid.Cid) bool { return true }, walker.WithVisitedTracker(t)) + walker.WalkDAG(ctx, dag.root, fetch, func(c cid.Cid) bool { + b.Fatal("unexpected CID in second walk") + return false + }, walker.WithVisitedTracker(t)) + } +} + +// --- dag-pb benchmarks (fanout=10, depth=3 -> ~1111 nodes) --- + +func BenchmarkDagPB_BlockAll(b *testing.B) { benchBlockAll(b, makeBenchDAG_DagPB(b, 10, 3)) } + +func BenchmarkDagPB_WalkerNoTracker(b *testing.B) { + benchWalkerNoTracker(b, makeBenchDAG_DagPB(b, 10, 3)) +} + +func BenchmarkDagPB_WalkerMapTracker(b *testing.B) { + benchWalkerMapTracker(b, makeBenchDAG_DagPB(b, 10, 3)) +} + +func BenchmarkDagPB_WalkerBloomTracker(b *testing.B) { + benchWalkerBloomTracker(b, makeBenchDAG_DagPB(b, 10, 3)) +} + +func BenchmarkDagPB_BloomSecondWalk(b *testing.B) { + benchWalkerBloomSecondWalk(b, makeBenchDAG_DagPB(b, 10, 3)) +} + +// --- dag-cbor benchmarks (fanout=10, depth=3 -> 1111 nodes) --- + +func BenchmarkDagCBOR_BlockAll(b *testing.B) { benchBlockAll(b, makeBenchDAG_DagCBOR(b, 10, 3)) } + +func BenchmarkDagCBOR_WalkerNoTracker(b *testing.B) { + benchWalkerNoTracker(b, makeBenchDAG_DagCBOR(b, 10, 3)) +} + +func BenchmarkDagCBOR_WalkerMapTracker(b *testing.B) { + benchWalkerMapTracker(b, makeBenchDAG_DagCBOR(b, 10, 3)) +} + +func BenchmarkDagCBOR_WalkerBloomTracker(b *testing.B) { + benchWalkerBloomTracker(b, makeBenchDAG_DagCBOR(b, 10, 3)) +} + +func BenchmarkDagCBOR_BloomSecondWalk(b *testing.B) { + benchWalkerBloomSecondWalk(b, makeBenchDAG_DagCBOR(b, 10, 3)) +} + +// --- mixed benchmarks (dag-cbor root -> 5 dag-pb subtrees, each fanout=5 depth=2 -> ~156 nodes) --- + +func BenchmarkMixed_BlockAll(b *testing.B) { benchBlockAll(b, makeBenchDAG_Mixed(b, 5, 2, 5)) } + +func BenchmarkMixed_WalkerNoTracker(b *testing.B) { + benchWalkerNoTracker(b, makeBenchDAG_Mixed(b, 5, 2, 5)) +} + +func BenchmarkMixed_WalkerMapTracker(b *testing.B) { + benchWalkerMapTracker(b, makeBenchDAG_Mixed(b, 5, 2, 5)) +} + +func BenchmarkMixed_WalkerBloomTracker(b *testing.B) { + benchWalkerBloomTracker(b, makeBenchDAG_Mixed(b, 5, 2, 5)) +} + +func BenchmarkMixed_BloomSecondWalk(b *testing.B) { + benchWalkerBloomSecondWalk(b, makeBenchDAG_Mixed(b, 5, 2, 5)) +} diff --git a/dag/walker/walker_test.go b/dag/walker/walker_test.go new file mode 100644 index 000000000..67f6ba996 --- /dev/null +++ b/dag/walker/walker_test.go @@ -0,0 +1,490 @@ +package walker_test + +import ( + "context" + "fmt" + "testing" + + blockstore "github.com/ipfs/boxo/blockstore" + "github.com/ipfs/boxo/dag/walker" + "github.com/ipfs/boxo/ipld/merkledag" + mdtest "github.com/ipfs/boxo/ipld/merkledag/test" + ft "github.com/ipfs/boxo/ipld/unixfs" + "github.com/ipfs/boxo/ipld/unixfs/hamt" + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" + ds "github.com/ipfs/go-datastore" + dssync "github.com/ipfs/go-datastore/sync" + mh "github.com/multiformats/go-multihash" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestBlockstore() blockstore.Blockstore { + return blockstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore())) +} + +func buildDAG(t *testing.T, bs blockstore.Blockstore, fanout, depth uint) (cid.Cid, []cid.Cid) { + t.Helper() + dserv := merkledag.NewDAGService(mdtest.Bserv()) + gen := mdtest.NewDAGGenerator() + root, allCids, err := gen.MakeDagNode(dserv.Add, fanout, depth) + require.NoError(t, err) + for _, c := range allCids { + nd, err := dserv.Get(t.Context(), c) + require.NoError(t, err) + require.NoError(t, bs.Put(t.Context(), nd)) + } + return root, allCids +} + +func putRawBlock(t *testing.T, bs blockstore.Blockstore, data []byte) cid.Cid { + t.Helper() + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + c := cid.NewCidV1(cid.Raw, hash) + blk, err := blocks.NewBlockWithCid(data, c) + require.NoError(t, err) + require.NoError(t, bs.Put(t.Context(), blk)) + return c +} + +func collectWalk(t *testing.T, bs blockstore.Blockstore, root cid.Cid, opts ...walker.Option) []cid.Cid { + t.Helper() + var visited []cid.Cid + fetch := walker.LinksFetcherFromBlockstore(bs) + err := walker.WalkDAG(t.Context(), root, fetch, func(c cid.Cid) bool { + visited = append(visited, c) + return true + }, opts...) + require.NoError(t, err) + return visited +} + +func TestWalkDAG_Traversal(t *testing.T) { + // Verify the walker visits every node in a DAG exactly once + // and in DFS pre-order (root first). + t.Run("visits all nodes in a multi-level DAG", func(t *testing.T) { + bs := newTestBlockstore() + root, allCids := buildDAG(t, bs, 3, 2) + + visited := collectWalk(t, bs, root) + assert.Len(t, visited, len(allCids), + "should visit every node in the DAG") + + visitedSet := make(map[cid.Cid]struct{}) + for _, c := range visited { + visitedSet[c] = struct{}{} + } + for _, c := range allCids { + assert.Contains(t, visitedSet, c, + "every CID in the DAG should be visited") + } + }) + + // The root CID must always be the first emitted CID, which is + // critical for ExecuteFastProvideRoot (root must be announced + // before any other block). + t.Run("root is always the first CID emitted", func(t *testing.T) { + bs := newTestBlockstore() + root, _ := buildDAG(t, bs, 3, 2) + + visited := collectWalk(t, bs, root) + require.NotEmpty(t, visited) + assert.Equal(t, root, visited[0]) + }) + + // Siblings must be visited in left-to-right link order, matching + // the legacy fetcherhelpers.BlockAll selector traversal and the + // conventional DFS order from IPIP-0412. + t.Run("siblings visited in left-to-right link order", func(t *testing.T) { + bs := newTestBlockstore() + dserv := merkledag.NewDAGService(mdtest.Bserv()) + + // three distinct leaves with deterministic link order + leafA := merkledag.NodeWithData([]byte("leaf-A")) + leafB := merkledag.NodeWithData([]byte("leaf-B")) + leafC := merkledag.NodeWithData([]byte("leaf-C")) + require.NoError(t, dserv.Add(t.Context(), leafA)) + require.NoError(t, dserv.Add(t.Context(), leafB)) + require.NoError(t, dserv.Add(t.Context(), leafC)) + + root := merkledag.NodeWithData([]byte("root")) + root.AddNodeLink("a", leafA) + root.AddNodeLink("b", leafB) + root.AddNodeLink("c", leafC) + require.NoError(t, dserv.Add(t.Context(), root)) + + for _, nd := range []merkledag.ProtoNode{*root, *leafA, *leafB, *leafC} { + require.NoError(t, bs.Put(t.Context(), &nd)) + } + + visited := collectWalk(t, bs, root.Cid()) + require.Len(t, visited, 4) + assert.Equal(t, root.Cid(), visited[0], "root first (pre-order)") + assert.Equal(t, leafA.Cid(), visited[1], "first link visited first") + assert.Equal(t, leafB.Cid(), visited[2], "second link visited second") + assert.Equal(t, leafC.Cid(), visited[3], "third link visited third") + }) + + // A single raw block with no links should be walked as a + // single-node DAG (common case: small files stored as raw leaves). + t.Run("single leaf node with no children", func(t *testing.T) { + bs := newTestBlockstore() + leaf := putRawBlock(t, bs, []byte("leaf data")) + + visited := collectWalk(t, bs, leaf) + assert.Len(t, visited, 1) + assert.Equal(t, leaf, visited[0]) + }) + + // When no VisitedTracker is provided, the walker should still + // visit every node (no dedup, no crash). + t.Run("works without any tracker", func(t *testing.T) { + bs := newTestBlockstore() + root, allCids := buildDAG(t, bs, 2, 2) + + visited := collectWalk(t, bs, root) // no WithVisitedTracker + assert.Len(t, visited, len(allCids)) + }) + + // DAG diamond: root -> {A, B}, A -> {C}, B -> {C}. + // With a tracker, C must be visited exactly once even though two + // paths lead to it. Without a tracker, C would be visited twice. + t.Run("DAG diamond: shared child visited once with tracker", func(t *testing.T) { + bs := newTestBlockstore() + dserv := merkledag.NewDAGService(mdtest.Bserv()) + + leafC := merkledag.NodeWithData([]byte("shared-leaf-C")) + require.NoError(t, dserv.Add(t.Context(), leafC)) + + nodeA := merkledag.NodeWithData([]byte("node-A")) + nodeA.AddNodeLink("c", leafC) + require.NoError(t, dserv.Add(t.Context(), nodeA)) + + nodeB := merkledag.NodeWithData([]byte("node-B")) + nodeB.AddNodeLink("c", leafC) + require.NoError(t, dserv.Add(t.Context(), nodeB)) + + root := merkledag.NodeWithData([]byte("root")) + root.AddNodeLink("a", nodeA) + root.AddNodeLink("b", nodeB) + require.NoError(t, dserv.Add(t.Context(), root)) + + for _, nd := range []merkledag.ProtoNode{*root, *nodeA, *nodeB, *leafC} { + require.NoError(t, bs.Put(t.Context(), &nd)) + } + + tracker := walker.NewMapTracker() + visited := collectWalk(t, bs, root.Cid(), walker.WithVisitedTracker(tracker)) + + cCount := 0 + for _, v := range visited { + if v == leafC.Cid() { + cCount++ + } + } + assert.Equal(t, 1, cCount, + "shared child C must be visited exactly once") + assert.Len(t, visited, 4, // root, A, B, C + "diamond DAG has 4 unique nodes") + }) + + // HAMT sharded directories are multi-level dag-pb structures where + // internal shard buckets are separate blocks. WalkDAG must visit + // every internal shard node and every leaf entry node. This is + // critical for provide: all HAMT layers must be announced so peers + // can enumerate the directory. + t.Run("HAMT sharded directory: all internal shard nodes visited", func(t *testing.T) { + bs := newTestBlockstore() + dserv := merkledag.NewDAGService(mdtest.Bserv()) + + // build a HAMT with 500 entries to force multiple shard levels. + // half are empty dirs (all share the same CID -- tests dedup + // across repeated leaves), half are unique files (distinct CIDs). + const nEntries = 500 + shard, err := hamt.NewShard(dserv, 256) + require.NoError(t, err) + leafCids := make(map[cid.Cid]struct{}) + emptyDir := ft.EmptyDirNode() + require.NoError(t, dserv.Add(t.Context(), emptyDir)) + for i := range nEntries { + name := fmt.Sprintf("entry-%04d", i) + if i%2 == 0 { + // empty dir (shared CID across all even entries) + require.NoError(t, shard.Set(t.Context(), name, emptyDir)) + leafCids[emptyDir.Cid()] = struct{}{} + } else { + // unique file + leaf := merkledag.NodeWithData([]byte(fmt.Sprintf("file-%04d", i))) + require.NoError(t, dserv.Add(t.Context(), leaf)) + require.NoError(t, shard.Set(t.Context(), name, leaf)) + leafCids[leaf.Cid()] = struct{}{} + } + } + + // serialize the HAMT (writes all shard nodes to dserv) + rootNd, err := shard.Node() + require.NoError(t, err) + rootCid := rootNd.Cid() + + // collect all CIDs reachable from root via dserv (ground truth) + allCids := make(map[cid.Cid]struct{}) + var enumerate func(c cid.Cid) + enumerate = func(c cid.Cid) { + if _, ok := allCids[c]; ok { + return + } + allCids[c] = struct{}{} + nd, err := dserv.Get(t.Context(), c) + if err != nil { + return + } + for _, lnk := range nd.Links() { + enumerate(lnk.Cid) + } + } + enumerate(rootCid) + + // copy all blocks to test blockstore + for c := range allCids { + nd, err := dserv.Get(t.Context(), c) + require.NoError(t, err) + require.NoError(t, bs.Put(t.Context(), nd)) + } + + // walk with tracker to dedup (HAMT leaf nodes are unique but + // the walker without tracker would revisit them via each shard) + tracker := walker.NewMapTracker() + visited := collectWalk(t, bs, rootCid, walker.WithVisitedTracker(tracker)) + visitedSet := make(map[cid.Cid]struct{}) + for _, c := range visited { + visitedSet[c] = struct{}{} + } + + assert.Len(t, visitedSet, len(allCids), + "WalkDAG must visit every unique block in the HAMT (internal shards + leaf entries)") + for c := range allCids { + assert.Contains(t, visitedSet, c, + "CID %s reachable from HAMT root must be visited", c) + } + + // verify internal shard nodes exist (not just leaves) + internalCount := len(allCids) - len(leafCids) + assert.Greater(t, internalCount, 1, + "HAMT with 500 entries must have multiple internal shard nodes") + t.Logf("HAMT: %d total blocks (%d internal shards, %d leaf entries)", + len(allCids), internalCount, len(leafCids)) + }) +} + +func TestWalkDAG_Dedup(t *testing.T) { + // MapTracker across two walks: CIDs from the first walk are + // skipped in the second walk. This is the core mechanism for + // cross-pin dedup in the reprovide cycle. + t.Run("shared MapTracker skips already-visited subtrees", func(t *testing.T) { + bs := newTestBlockstore() + root1, cids1 := buildDAG(t, bs, 2, 2) + root2, _ := buildDAG(t, bs, 2, 2) + + tracker := walker.NewMapTracker() + + visited1 := collectWalk(t, bs, root1, walker.WithVisitedTracker(tracker)) + assert.Len(t, visited1, len(cids1)) + + // second walk: independent root, but if any CID overlapped + // it would be skipped + visited2 := collectWalk(t, bs, root2, walker.WithVisitedTracker(tracker)) + for _, c := range visited2 { + for _, c1 := range visited1 { + assert.NotEqual(t, c, c1, + "CID from first walk must not appear in second walk") + } + } + }) + + // BloomTracker: walk the same root twice. Second walk should + // produce zero CIDs because everything is already in the bloom. + t.Run("shared BloomTracker dedup across walks of same root", func(t *testing.T) { + bs := newTestBlockstore() + root, allCids := buildDAG(t, bs, 3, 2) + + tracker, err := walker.NewBloomTracker(walker.MinBloomCapacity, walker.DefaultBloomFPRate) + require.NoError(t, err) + + visited1 := collectWalk(t, bs, root, walker.WithVisitedTracker(tracker)) + assert.Len(t, visited1, len(allCids)) + + visited2 := collectWalk(t, bs, root, walker.WithVisitedTracker(tracker)) + assert.Empty(t, visited2, + "second walk of same root must produce zero CIDs") + }) +} + +func TestWalkDAG_Locality(t *testing.T) { + // WithLocality filters CIDs that are not locally available. + // Used by MFS providers to skip blocks not in the local blockstore. + t.Run("only local CIDs are visited", func(t *testing.T) { + bs := newTestBlockstore() + root, _ := buildDAG(t, bs, 2, 1) + + locality := func(_ context.Context, c cid.Cid) (bool, error) { + return c == root, nil // only root is "local" + } + + visited := collectWalk(t, bs, root, walker.WithLocality(locality)) + assert.Len(t, visited, 1, + "only the local root should be visited") + assert.Equal(t, root, visited[0]) + }) + + // Locality errors should skip the CID (best-effort), not crash + // the walk. + t.Run("locality error skips CID gracefully", func(t *testing.T) { + bs := newTestBlockstore() + root, _ := buildDAG(t, bs, 2, 1) + + locality := func(_ context.Context, c cid.Cid) (bool, error) { + if c == root { + return true, nil + } + return false, assert.AnError + } + + visited := collectWalk(t, bs, root, walker.WithLocality(locality)) + assert.Len(t, visited, 1, + "children with locality errors should be skipped") + assert.Equal(t, root, visited[0]) + }) +} + +func TestWalkDAG_ErrorHandling(t *testing.T) { + // When the root itself is missing from the blockstore, the walk + // should return successfully with zero CIDs (best-effort: a + // corrupt block should not break the entire provide cycle). + t.Run("missing root produces no CIDs", func(t *testing.T) { + bs := newTestBlockstore() + missing := putRawBlock(t, bs, []byte("will-be-deleted")) + require.NoError(t, bs.DeleteBlock(t.Context(), missing)) + + var visited []cid.Cid + fetch := walker.LinksFetcherFromBlockstore(bs) + err := walker.WalkDAG(t.Context(), missing, fetch, func(c cid.Cid) bool { + visited = append(visited, c) + return true + }) + require.NoError(t, err, + "walk should succeed even with missing root (best-effort)") + assert.Empty(t, visited) + }) + + // When children fail to fetch, they are skipped but the root is + // still emitted. This ensures a corrupt child block doesn't + // prevent the parent from being provided. + t.Run("fetch error on children skips them but emits root", func(t *testing.T) { + bs := newTestBlockstore() + root, _ := buildDAG(t, bs, 2, 1) + + realFetch := walker.LinksFetcherFromBlockstore(bs) + failFetch := func(ctx context.Context, c cid.Cid) ([]cid.Cid, error) { + if c == root { + return realFetch(ctx, c) + } + return nil, assert.AnError + } + + var visited []cid.Cid + err := walker.WalkDAG(t.Context(), root, failFetch, func(c cid.Cid) bool { + visited = append(visited, c) + return true + }) + require.NoError(t, err) + assert.Len(t, visited, 1, + "only root should be emitted when children fail") + assert.Equal(t, root, visited[0]) + }) + + // CIDs are marked visited at pop time (before fetch). If fetch + // fails, the CID stays in the tracker and won't be retried this + // cycle. This avoids a double bloom scan per CID. The CID is + // caught in the next reprovide cycle (22h). + t.Run("fetch error still marks CID as visited", func(t *testing.T) { + bs := newTestBlockstore() + root, _ := buildDAG(t, bs, 2, 1) + + tracker := walker.NewMapTracker() + realFetch := walker.LinksFetcherFromBlockstore(bs) + failChildFetch := func(ctx context.Context, c cid.Cid) ([]cid.Cid, error) { + if c != root { + return nil, assert.AnError + } + return realFetch(ctx, c) + } + + walker.WalkDAG(t.Context(), root, failChildFetch, func(c cid.Cid) bool { + return true + }, walker.WithVisitedTracker(tracker)) + + children, _ := realFetch(t.Context(), root) + for _, child := range children { + assert.True(t, tracker.Has(child), + "CID %s must be marked visited even after fetch error", child) + } + }) +} + +func TestWalkDAG_StopConditions(t *testing.T) { + // emit returning false must stop the walk immediately. + t.Run("emit false stops walk after N CIDs", func(t *testing.T) { + bs := newTestBlockstore() + root, _ := buildDAG(t, bs, 3, 3) + + count := 0 + fetch := walker.LinksFetcherFromBlockstore(bs) + err := walker.WalkDAG(t.Context(), root, fetch, func(c cid.Cid) bool { + count++ + return count < 5 + }) + require.NoError(t, err) + assert.Equal(t, 5, count, + "walk should stop after emit returns false") + }) + + // Context cancellation during the walk should stop it and return + // the context error. + t.Run("context cancellation stops walk mid-flight", func(t *testing.T) { + bs := newTestBlockstore() + root, _ := buildDAG(t, bs, 3, 3) + + ctx, cancel := context.WithCancel(t.Context()) + count := 0 + fetch := walker.LinksFetcherFromBlockstore(bs) + err := walker.WalkDAG(ctx, root, fetch, func(c cid.Cid) bool { + count++ + if count >= 3 { + cancel() + } + return true + }) + assert.ErrorIs(t, err, context.Canceled) + }) + + // An already-cancelled context should return immediately without + // visiting any CIDs. + t.Run("already-cancelled context returns immediately", func(t *testing.T) { + bs := newTestBlockstore() + root, _ := buildDAG(t, bs, 2, 1) + + ctx, cancel := context.WithCancel(t.Context()) + cancel() + + var visited []cid.Cid + fetch := walker.LinksFetcherFromBlockstore(bs) + err := walker.WalkDAG(ctx, root, fetch, func(c cid.Cid) bool { + visited = append(visited, c) + return true + }) + assert.ErrorIs(t, err, context.Canceled) + assert.Empty(t, visited, + "no CIDs should be visited with cancelled context") + }) +} diff --git a/examples/go.mod b/examples/go.mod index bbbbbbaa7..c0f1b2c0c 100644 --- a/examples/go.mod +++ b/examples/go.mod @@ -50,7 +50,7 @@ require ( github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/huin/goupnp v1.3.0 // indirect - github.com/ipfs/bbloom v0.0.4 // indirect + github.com/ipfs/bbloom v0.1.0 // indirect github.com/ipfs/go-bitfield v1.1.0 // indirect github.com/ipfs/go-cidutil v0.1.1 // indirect github.com/ipfs/go-dsqueue v0.2.0 // indirect diff --git a/examples/go.sum b/examples/go.sum index bb0f2bd2d..55c111d90 100644 --- a/examples/go.sum +++ b/examples/go.sum @@ -239,8 +239,8 @@ github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFck github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= -github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= +github.com/ipfs/bbloom v0.1.0 h1:nIWwfIE3AaG7RCDQIsrUonGCOTp7qSXzxH7ab/ss964= +github.com/ipfs/bbloom v0.1.0/go.mod h1:lDy3A3i6ndgEW2z1CaRFvDi5/ZTzgM1IxA/pkL7Wgts= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.2.3 h1:mpCuDaNXJ4wrBJLrtEaGFGXkferrw5eqVvzaHhtFKQk= diff --git a/go.mod b/go.mod index adc82a12a..4fab21a2a 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/google/uuid v1.6.0 github.com/gorilla/mux v1.8.1 github.com/hashicorp/golang-lru/v2 v2.0.7 - github.com/ipfs/bbloom v0.0.4 + github.com/ipfs/bbloom v0.1.0 github.com/ipfs/go-bitfield v1.1.0 github.com/ipfs/go-block-format v0.2.3 github.com/ipfs/go-cid v0.6.0 diff --git a/go.sum b/go.sum index 4fd50e03e..642b3ca6c 100644 --- a/go.sum +++ b/go.sum @@ -240,8 +240,8 @@ github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFck github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= -github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= +github.com/ipfs/bbloom v0.1.0 h1:nIWwfIE3AaG7RCDQIsrUonGCOTp7qSXzxH7ab/ss964= +github.com/ipfs/bbloom v0.1.0/go.mod h1:lDy3A3i6ndgEW2z1CaRFvDi5/ZTzgM1IxA/pkL7Wgts= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.2.3 h1:mpCuDaNXJ4wrBJLrtEaGFGXkferrw5eqVvzaHhtFKQk= diff --git a/pinning/pinner/dspinner/uniquepinprovider.go b/pinning/pinner/dspinner/uniquepinprovider.go new file mode 100644 index 000000000..84722ef96 --- /dev/null +++ b/pinning/pinner/dspinner/uniquepinprovider.go @@ -0,0 +1,118 @@ +package dspinner + +import ( + "context" + + "github.com/ipfs/boxo/blockstore" + "github.com/ipfs/boxo/dag/walker" + ipfspinner "github.com/ipfs/boxo/pinning/pinner" + "github.com/ipfs/boxo/provider" + "github.com/ipfs/go-cid" + mh "github.com/multiformats/go-multihash" +) + +// NewUniquePinnedProvider returns a [provider.KeyChanFunc] that emits +// all blocks reachable from pinned roots, with bloom filter cross-pin +// deduplication via the shared [walker.VisitedTracker]. +// +// Processing order: recursive pin DAGs first (via [walker.WalkDAG]), +// then direct pins. This order ensures that by the time direct pins +// are processed, all recursive DAGs have been walked and their CIDs +// are in the tracker. +// +// The existing [NewPinnedProvider] is unchanged. This function is used +// only when the +unique strategy modifier is active. +func NewUniquePinnedProvider( + pinning ipfspinner.Pinner, + bs blockstore.Blockstore, + tracker walker.VisitedTracker, +) provider.KeyChanFunc { + fetch := walker.LinksFetcherFromBlockstore(bs) + return newPinnedProvider(pinning, tracker, func(ctx context.Context, root cid.Cid, emit func(cid.Cid) bool) error { + return walker.WalkDAG(ctx, root, fetch, emit, walker.WithVisitedTracker(tracker)) + }, "unique provide") +} + +// NewPinnedEntityRootsProvider returns a [provider.KeyChanFunc] that +// emits entity roots (files, directories, HAMT shards) reachable from +// pinned roots, skipping internal file chunks. Uses +// [walker.WalkEntityRoots] with the shared [walker.VisitedTracker] +// for cross-pin deduplication. +// +// Same processing order as [NewUniquePinnedProvider]: recursive pins +// first, direct pins second. +func NewPinnedEntityRootsProvider( + pinning ipfspinner.Pinner, + bs blockstore.Blockstore, + tracker walker.VisitedTracker, +) provider.KeyChanFunc { + fetch := walker.NodeFetcherFromBlockstore(bs) + return newPinnedProvider(pinning, tracker, func(ctx context.Context, root cid.Cid, emit func(cid.Cid) bool) error { + return walker.WalkEntityRoots(ctx, root, fetch, emit, walker.WithVisitedTracker(tracker)) + }, "entity provide") +} + +// newPinnedProvider is the shared implementation for +// [NewUniquePinnedProvider] and [NewPinnedEntityRootsProvider]. The +// walk callback performs the actual DAG traversal for each recursive +// pin root. +func newPinnedProvider( + pinning ipfspinner.Pinner, + tracker walker.VisitedTracker, + walk func(ctx context.Context, root cid.Cid, emit func(cid.Cid) bool) error, + logPrefix string, +) provider.KeyChanFunc { + return func(ctx context.Context) (<-chan cid.Cid, error) { + outCh := make(chan cid.Cid) + + go func() { + defer close(outCh) + + emit := func(c cid.Cid) bool { + select { + case outCh <- c: + return true + case <-ctx.Done(): + return false + } + } + + // 1. Walk recursive pin DAGs (bulk of dedup benefit). + // A corrupted pin entry is logged and skipped so it does + // not prevent remaining pins from being provided. + for sc := range pinning.RecursiveKeys(ctx, false) { + if sc.Err != nil { + log.Errorf("%s recursive pins: %s", logPrefix, sc.Err) + continue + } + if err := walk(ctx, sc.Pin.Key, emit); err != nil { + return // context cancelled + } + } + + // 2. Direct pins (emit if not already visited). + // Same best-effort: skip corrupted entries. + for sc := range pinning.DirectKeys(ctx, false) { + if sc.Err != nil { + log.Errorf("%s direct pins: %s", logPrefix, sc.Err) + continue + } + // skip identity CIDs: content is inline, no need to provide + if sc.Pin.Key.Prefix().MhType == mh.IDENTITY { + continue + } + // skip if already visited (by a recursive pin walk above) + if !tracker.Visit(sc.Pin.Key) { + continue + } + // emit returns false when context is cancelled + // (consumer stopped reading from the channel) + if !emit(sc.Pin.Key) { + return + } + } + }() + + return outCh, nil + } +} diff --git a/pinning/pinner/dspinner/uniquepinprovider_test.go b/pinning/pinner/dspinner/uniquepinprovider_test.go new file mode 100644 index 000000000..5e903285f --- /dev/null +++ b/pinning/pinner/dspinner/uniquepinprovider_test.go @@ -0,0 +1,333 @@ +package dspinner + +import ( + "testing" + + "github.com/ipfs/boxo/blockservice" + "github.com/ipfs/boxo/blockstore" + "github.com/ipfs/boxo/dag/walker" + "github.com/ipfs/boxo/exchange/offline" + "github.com/ipfs/boxo/ipld/merkledag" + mdutils "github.com/ipfs/boxo/ipld/merkledag/test" + ft "github.com/ipfs/boxo/ipld/unixfs" + ipfspinner "github.com/ipfs/boxo/pinning/pinner" + "github.com/ipfs/boxo/provider" + "github.com/ipfs/go-cid" + "github.com/ipfs/go-datastore" + dssync "github.com/ipfs/go-datastore/sync" + format "github.com/ipfs/go-ipld-format" + mh "github.com/multiformats/go-multihash" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupPinTest(t *testing.T) (blockstore.Blockstore, ipfspinner.Pinner, format.DAGService) { + t.Helper() + ds := dssync.MutexWrap(datastore.NewMapDatastore()) + bs := blockstore.NewBlockstore(ds) + bserv := blockservice.New(bs, offline.Exchange(bs)) + dserv := merkledag.NewDAGService(bserv) + pinner, err := New(t.Context(), ds, dserv) + require.NoError(t, err) + return bs, pinner, dserv +} + +// TestUniquePinnedProvider_DedupAcrossPins verifies that blocks shared +// between two recursive pins are emitted only once. This is the core +// use case: append-only datasets where each pin is the previous one +// plus a small delta, sharing the majority of their DAGs. +func TestUniquePinnedProvider_DedupAcrossPins(t *testing.T) { + bs, pinner, dserv := setupPinTest(t) + + // two DAGs that share a common subtree + shared := merkledag.NodeWithData([]byte("shared")) + require.NoError(t, dserv.Add(t.Context(), shared)) + + root1 := merkledag.NodeWithData([]byte("root1")) + root1.AddNodeLink("shared", shared) + require.NoError(t, dserv.Add(t.Context(), root1)) + + root2 := merkledag.NodeWithData([]byte("root2")) + root2.AddNodeLink("shared", shared) + require.NoError(t, dserv.Add(t.Context(), root2)) + + require.NoError(t, pinner.PinWithMode(t.Context(), root1.Cid(), ipfspinner.Recursive, "pin1")) + require.NoError(t, pinner.PinWithMode(t.Context(), root2.Cid(), ipfspinner.Recursive, "pin2")) + + tracker := walker.NewMapTracker() + keyChanF := NewUniquePinnedProvider(pinner, bs, tracker) + ch, err := keyChanF(t.Context()) + require.NoError(t, err) + + visited := make(map[cid.Cid]int) + for c := range ch { + visited[c]++ + } + + assert.Equal(t, 1, visited[shared.Cid()], + "shared block emitted exactly once across both pins") + assert.Equal(t, 1, visited[root1.Cid()]) + assert.Equal(t, 1, visited[root2.Cid()]) + assert.Len(t, visited, 3, "root1 + root2 + shared") +} + +// TestUniquePinnedProvider_DirectPins verifies that direct pins are +// emitted and deduplicated against recursive pin walks. A CID that +// appears both as a direct pin and within a recursive pin DAG should +// be emitted only once. +func TestUniquePinnedProvider_DirectPins(t *testing.T) { + bs, pinner, dserv := setupPinTest(t) + + leaf := merkledag.NodeWithData([]byte("leaf")) + require.NoError(t, dserv.Add(t.Context(), leaf)) + + root := merkledag.NodeWithData([]byte("root")) + root.AddNodeLink("leaf", leaf) + require.NoError(t, dserv.Add(t.Context(), root)) + + // pin root recursively (covers root + leaf) + require.NoError(t, pinner.PinWithMode(t.Context(), root.Cid(), ipfspinner.Recursive, "rec")) + // also direct-pin the leaf + require.NoError(t, pinner.PinWithMode(t.Context(), leaf.Cid(), ipfspinner.Direct, "dir")) + + tracker := walker.NewMapTracker() + keyChanF := NewUniquePinnedProvider(pinner, bs, tracker) + ch, err := keyChanF(t.Context()) + require.NoError(t, err) + + visited := make(map[cid.Cid]int) + for c := range ch { + visited[c]++ + } + + assert.Equal(t, 1, visited[leaf.Cid()], + "leaf emitted once despite being both recursively and directly pinned") + assert.Equal(t, 1, visited[root.Cid()]) + assert.Len(t, visited, 2) +} + +// TestUniquePinnedProvider_BufferedProviderCompat verifies that +// NewUniquePinnedProvider works with NewBufferedProvider, matching +// the wrapping pattern used in kubo's createKeyProvider. +func TestUniquePinnedProvider_BufferedProviderCompat(t *testing.T) { + bs, pinner, dserv := setupPinTest(t) + daggen := mdutils.NewDAGGenerator() + root, allCids, err := daggen.MakeDagNode(dserv.Add, 3, 2) + require.NoError(t, err) + require.NoError(t, pinner.PinWithMode(t.Context(), root, ipfspinner.Recursive, "test")) + + tracker := walker.NewMapTracker() + keyChanF := provider.NewBufferedProvider( + NewUniquePinnedProvider(pinner, bs, tracker)) + ch, err := keyChanF(t.Context()) + require.NoError(t, err) + + count := 0 + for range ch { + count++ + } + assert.Equal(t, len(allCids), count) +} + +// TestPinnedEntityRootsProvider_SkipsChunks verifies that the entity +// roots provider emits file roots but does not descend into chunks. +// This is the core optimization of the +entities strategy applied to +// pinned content. +func TestPinnedEntityRootsProvider_SkipsChunks(t *testing.T) { + bs, pinner, dserv := setupPinTest(t) + + // chunked file: root -> chunk1, chunk2 + chunk1 := merkledag.NewRawNode([]byte("chunk1")) + chunk2 := merkledag.NewRawNode([]byte("chunk2")) + require.NoError(t, dserv.Add(t.Context(), chunk1)) + require.NoError(t, dserv.Add(t.Context(), chunk2)) + + fsn := ft.NewFSNode(ft.TFile) + fsn.AddBlockSize(6) + fsn.AddBlockSize(6) + fileData, err := fsn.GetBytes() + require.NoError(t, err) + fileNode := merkledag.NodeWithData(fileData) + fileNode.AddNodeLink("", chunk1) + fileNode.AddNodeLink("", chunk2) + require.NoError(t, dserv.Add(t.Context(), fileNode)) + + // directory containing the file + dir := ft.EmptyDirNode() + dir.AddNodeLink("big.bin", fileNode) + require.NoError(t, dserv.Add(t.Context(), dir)) + require.NoError(t, pinner.PinWithMode(t.Context(), dir.Cid(), ipfspinner.Recursive, "test")) + + tracker := walker.NewMapTracker() + keyChanF := NewPinnedEntityRootsProvider(pinner, bs, tracker) + ch, err := keyChanF(t.Context()) + require.NoError(t, err) + + var visited []cid.Cid + for c := range ch { + visited = append(visited, c) + } + + assert.Contains(t, visited, dir.Cid(), "directory emitted") + assert.Contains(t, visited, fileNode.Cid(), "file root emitted") + assert.NotContains(t, visited, chunk1.Cid(), "chunk1 NOT emitted") + assert.NotContains(t, visited, chunk2.Cid(), "chunk2 NOT emitted") + assert.Len(t, visited, 2, "dir + file root only") +} + +// TestPinnedEntityRootsProvider_DedupAcrossPins verifies that entity +// roots shared between pins are emitted only once, same as +// NewUniquePinnedProvider but at the entity level. +func TestPinnedEntityRootsProvider_DedupAcrossPins(t *testing.T) { + bs, pinner, dserv := setupPinTest(t) + + // shared file across two directories + sharedFile := merkledag.NodeWithData(func() []byte { + fsn := ft.NewFSNode(ft.TFile) + fsn.SetData([]byte("shared")) + b, _ := fsn.GetBytes() + return b + }()) + require.NoError(t, dserv.Add(t.Context(), sharedFile)) + + // directories must be distinct (different unique files) so they + // get different CIDs + unique1 := merkledag.NodeWithData(func() []byte { + fsn := ft.NewFSNode(ft.TFile) + fsn.SetData([]byte("unique1")) + b, _ := fsn.GetBytes() + return b + }()) + unique2 := merkledag.NodeWithData(func() []byte { + fsn := ft.NewFSNode(ft.TFile) + fsn.SetData([]byte("unique2")) + b, _ := fsn.GetBytes() + return b + }()) + require.NoError(t, dserv.Add(t.Context(), unique1)) + require.NoError(t, dserv.Add(t.Context(), unique2)) + + dir1 := ft.EmptyDirNode() + dir1.AddNodeLink("shared.txt", sharedFile) + dir1.AddNodeLink("unique.txt", unique1) + require.NoError(t, dserv.Add(t.Context(), dir1)) + + dir2 := ft.EmptyDirNode() + dir2.AddNodeLink("shared.txt", sharedFile) + dir2.AddNodeLink("unique.txt", unique2) + require.NoError(t, dserv.Add(t.Context(), dir2)) + + require.NoError(t, pinner.PinWithMode(t.Context(), dir1.Cid(), ipfspinner.Recursive, "pin1")) + require.NoError(t, pinner.PinWithMode(t.Context(), dir2.Cid(), ipfspinner.Recursive, "pin2")) + + tracker := walker.NewMapTracker() + keyChanF := NewPinnedEntityRootsProvider(pinner, bs, tracker) + ch, err := keyChanF(t.Context()) + require.NoError(t, err) + + visited := make(map[cid.Cid]int) + for c := range ch { + visited[c]++ + } + + assert.Equal(t, 1, visited[sharedFile.Cid()], + "shared file emitted once across both pins") + assert.Len(t, visited, 5, "dir1 + dir2 + shared + unique1 + unique2") +} + +// --- identity CID filtering --- + +func makeIdentityCID(t *testing.T, data []byte) cid.Cid { + t.Helper() + hash, err := mh.Encode(data, mh.IDENTITY) + require.NoError(t, err) + return cid.NewCidV1(cid.Raw, hash) +} + +// TestUniquePinnedProvider_IdentityDirectPin verifies that a +// directly-pinned identity CID is not emitted. Identity CIDs embed +// data inline, so providing them to the DHT is wasteful. +func TestUniquePinnedProvider_IdentityDirectPin(t *testing.T) { + bs, pinner, dserv := setupPinTest(t) + + normal := merkledag.NodeWithData([]byte("normal")) + require.NoError(t, dserv.Add(t.Context(), normal)) + + idCid := makeIdentityCID(t, []byte("inline")) + + require.NoError(t, pinner.PinWithMode(t.Context(), normal.Cid(), ipfspinner.Direct, "normal")) + require.NoError(t, pinner.PinWithMode(t.Context(), idCid, ipfspinner.Direct, "identity")) + + tracker := walker.NewMapTracker() + keyChanF := NewUniquePinnedProvider(pinner, bs, tracker) + ch, err := keyChanF(t.Context()) + require.NoError(t, err) + + var visited []cid.Cid + for c := range ch { + visited = append(visited, c) + } + + assert.Contains(t, visited, normal.Cid(), "normal direct pin emitted") + assert.NotContains(t, visited, idCid, "identity direct pin must not be emitted") +} + +// TestUniquePinnedProvider_IdentityInRecursiveDAG verifies that +// identity CIDs within a recursive pin DAG are not emitted. The +// walker traverses through them but skips emission. +func TestUniquePinnedProvider_IdentityInRecursiveDAG(t *testing.T) { + bs, pinner, dserv := setupPinTest(t) + + idChild := makeIdentityCID(t, []byte("inline-leaf")) + + root := merkledag.NodeWithData([]byte("root-with-id")) + require.NoError(t, root.AddRawLink("inline", &format.Link{Cid: idChild})) + require.NoError(t, dserv.Add(t.Context(), root)) + + require.NoError(t, pinner.PinWithMode(t.Context(), root.Cid(), ipfspinner.Recursive, "rec")) + + tracker := walker.NewMapTracker() + keyChanF := NewUniquePinnedProvider(pinner, bs, tracker) + ch, err := keyChanF(t.Context()) + require.NoError(t, err) + + var visited []cid.Cid + for c := range ch { + visited = append(visited, c) + } + + assert.Contains(t, visited, root.Cid(), "non-identity root emitted") + assert.NotContains(t, visited, idChild, "identity child must not be emitted") +} + +// TestPinnedEntityRootsProvider_IdentityDirectPin verifies that the +// entity roots provider also filters identity CIDs from direct pins. +func TestPinnedEntityRootsProvider_IdentityDirectPin(t *testing.T) { + bs, pinner, dserv := setupPinTest(t) + + normal := merkledag.NodeWithData(func() []byte { + fsn := ft.NewFSNode(ft.TFile) + fsn.SetData([]byte("normal")) + b, _ := fsn.GetBytes() + return b + }()) + require.NoError(t, dserv.Add(t.Context(), normal)) + + idCid := makeIdentityCID(t, []byte("inline")) + + require.NoError(t, pinner.PinWithMode(t.Context(), normal.Cid(), ipfspinner.Direct, "normal")) + require.NoError(t, pinner.PinWithMode(t.Context(), idCid, ipfspinner.Direct, "identity")) + + tracker := walker.NewMapTracker() + keyChanF := NewPinnedEntityRootsProvider(pinner, bs, tracker) + ch, err := keyChanF(t.Context()) + require.NoError(t, err) + + var visited []cid.Cid + for c := range ch { + visited = append(visited, c) + } + + assert.Contains(t, visited, normal.Cid(), "normal direct pin emitted") + assert.NotContains(t, visited, idCid, "identity direct pin must not be emitted") +} diff --git a/provider/provider.go b/provider/provider.go index 1429bf17e..a52d77b26 100644 --- a/provider/provider.go +++ b/provider/provider.go @@ -111,8 +111,54 @@ func NewPrioritizedProvider(streams ...KeyChanFunc) KeyChanFunc { last := len(streams) - 1 for i, stream := range streams { if err := handleStream(stream, i < last); err != nil { - log.Warnf("error in prioritized strategy while handling CID stream %d: %w", i, err) - return + log.Errorf("error in prioritized strategy while handling CID stream %d: %s", i, err) + continue // best-effort: e.g. MFS flush error should not prevent pinned content from being provided + } + } + }() + + return outCh, nil + } +} + +// NewConcatProvider concatenates multiple KeyChanFunc streams into one, +// running them sequentially in order. All CIDs from each stream are +// forwarded to the output channel without deduplication. +// +// Use this when the input streams are already deduplicated externally +// (e.g. via a shared [walker.VisitedTracker]). For streams that may +// produce overlapping CIDs, use [NewPrioritizedProvider] instead, which +// maintains its own visited set. +// +// Like [NewPrioritizedProvider], a failure in one stream's KeyChanFunc +// is logged and skipped -- remaining streams still run. +func NewConcatProvider(streams ...KeyChanFunc) KeyChanFunc { + return func(ctx context.Context) (<-chan cid.Cid, error) { + outCh := make(chan cid.Cid) + + go func() { + defer close(outCh) + for i, stream := range streams { + ch, err := stream(ctx) + if err != nil { + log.Errorf("error in concat strategy while handling CID stream %d: %s", i, err) + continue + } + drain: + for { + select { + case <-ctx.Done(): + return + case c, ok := <-ch: + if !ok { + break drain + } + select { + case <-ctx.Done(): + return + case outCh <- c: + } + } } } }() diff --git a/provider/reprovider_test.go b/provider/reprovider_test.go index e65c7ffcb..357af5747 100644 --- a/provider/reprovider_test.go +++ b/provider/reprovider_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "crypto/rand" + "fmt" "runtime" "slices" "strconv" @@ -322,3 +323,220 @@ func TestNewPrioritizedProvider(t *testing.T) { }) } } + +// TestPrioritizedProvider_StreamErrorContinues verifies that a failure +// in one stream does not prevent subsequent streams from running. +// e.g. MFS flush failure should not block pinned content from being provided. +func TestPrioritizedProvider_StreamErrorContinues(t *testing.T) { + cids := makeCIDs(3) + + failingStream := func(_ context.Context) (<-chan cid.Cid, error) { + return nil, fmt.Errorf("stream init failed") + } + goodStream := newMockKeyChanFunc(cids) + + // failing stream first, good stream second + stream := NewPrioritizedProvider(failingStream, goodStream) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + // good stream should still produce its CIDs despite the first stream failing + require.Equal(t, cids, received) +} + +// TestPrioritizedProvider_ContextCancellation verifies that context +// cancellation stops the provider cleanly without hanging. +func TestPrioritizedProvider_ContextCancellation(t *testing.T) { + // slow stream that blocks until context is cancelled + slowStream := func(ctx context.Context) (<-chan cid.Cid, error) { + ch := make(chan cid.Cid) + go func() { + defer close(ch) + <-ctx.Done() + }() + return ch, nil + } + + ctx, cancel := context.WithCancel(t.Context()) + stream := NewPrioritizedProvider(slowStream) + ch, err := stream(ctx) + require.NoError(t, err) + + cancel() + // channel should close promptly after cancellation + for range ch { + t.Fatal("should not receive CIDs after cancellation") + } +} + +// TestPrioritizedProvider_ThreeStreams verifies correct ordering and +// dedup across three streams (the common case: MFS + pinned + direct). +func TestPrioritizedProvider_ThreeStreams(t *testing.T) { + cids := makeCIDs(9) + s1 := newMockKeyChanFunc(cids[:3]) // highest priority + s2 := newMockKeyChanFunc(append(cids[1:4:4], cids[4:6]...)) // overlaps with s1 + s3 := newMockKeyChanFunc(cids[6:]) // lowest priority + + stream := NewPrioritizedProvider(s1, s2, s3) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + // s1: 0,1,2 (all new) + // s2: 1,2 skipped (seen in s1), 3,4,5 new + // s3: 6,7,8 (all new, last stream so no dedup tracking) + require.Equal(t, []cid.Cid{ + cids[0], cids[1], cids[2], // s1 + cids[3], cids[4], cids[5], // s2 (deduped 1,2) + cids[6], cids[7], cids[8], // s3 + }, received) +} + +// TestPrioritizedProvider_AllStreamsFail verifies that when every +// stream fails, the output channel closes cleanly with no CIDs. +func TestPrioritizedProvider_AllStreamsFail(t *testing.T) { + fail := func(_ context.Context) (<-chan cid.Cid, error) { + return nil, fmt.Errorf("fail") + } + stream := NewPrioritizedProvider(fail, fail, fail) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + require.Empty(t, received) +} + +// TestPrioritizedProvider_ErrorContinues verifies that a failing stream +// does not prevent subsequent streams from being processed. This is a +// regression test for a bug where the goroutine returned on error +// instead of continuing to the next stream. +func TestPrioritizedProvider_ErrorContinues(t *testing.T) { + cids := makeCIDs(3) + fail := func(_ context.Context) (<-chan cid.Cid, error) { + return nil, fmt.Errorf("stream error") + } + good := newMockKeyChanFunc(cids) + + stream := NewPrioritizedProvider(fail, good) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + require.Equal(t, cids, received, + "CIDs from the good stream must still be emitted after a prior stream fails") +} + +// TestNewConcatProvider verifies that ConcatProvider concatenates +// streams in order without deduplication. Unlike PrioritizedProvider, +// duplicate CIDs across streams are NOT filtered. +func TestNewConcatProvider(t *testing.T) { + cids := makeCIDs(6) + + t.Run("concatenates in order", func(t *testing.T) { + s1 := newMockKeyChanFunc(cids[:3]) + s2 := newMockKeyChanFunc(cids[3:]) + + stream := NewConcatProvider(s1, s2) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + require.Equal(t, cids, received) + }) + + t.Run("duplicates are NOT filtered", func(t *testing.T) { + // same CIDs in both streams -- ConcatProvider passes them all through + s1 := newMockKeyChanFunc(cids[:3]) + s2 := newMockKeyChanFunc(cids[:3]) + + stream := NewConcatProvider(s1, s2) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + expected := append(cids[:3:3], cids[:3]...) + require.Equal(t, expected, received) + }) + + t.Run("stream error skips to next", func(t *testing.T) { + failing := func(_ context.Context) (<-chan cid.Cid, error) { + return nil, fmt.Errorf("init failed") + } + good := newMockKeyChanFunc(cids[:3]) + + stream := NewConcatProvider(failing, good) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + require.Equal(t, cids[:3], received) + }) + + t.Run("single stream", func(t *testing.T) { + stream := NewConcatProvider(newMockKeyChanFunc(cids)) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + require.Equal(t, cids, received) + }) + + t.Run("empty streams", func(t *testing.T) { + empty := newMockKeyChanFunc(nil) + stream := NewConcatProvider(empty, newMockKeyChanFunc(cids[:2])) + ch, err := stream(t.Context()) + require.NoError(t, err) + + var received []cid.Cid + for c := range ch { + received = append(received, c) + } + require.Equal(t, cids[:2], received) + }) + + t.Run("context cancellation stops cleanly", func(t *testing.T) { + slowStream := func(ctx context.Context) (<-chan cid.Cid, error) { + ch := make(chan cid.Cid) + go func() { + defer close(ch) + <-ctx.Done() + }() + return ch, nil + } + + ctx, cancel := context.WithCancel(t.Context()) + stream := NewConcatProvider(slowStream) + ch, err := stream(ctx) + require.NoError(t, err) + + cancel() + for range ch { + t.Fatal("should not receive CIDs after cancellation") + } + }) +}