diff --git a/.github/workflows/ci-dgraph-integration2-tests.yml b/.github/workflows/ci-dgraph-integration2-tests.yml index 403a08465b4..7c8fadd24f4 100644 --- a/.github/workflows/ci-dgraph-integration2-tests.yml +++ b/.github/workflows/ci-dgraph-integration2-tests.yml @@ -29,6 +29,15 @@ jobs: with: fetch-depth: 0 + - name: Restore benchmark dataset cache + uses: actions/cache/restore@v4 + with: + path: dgraphtest/datafiles + key: dataset-dgraphtest-v1 + + - name: Ensure datafiles directory + run: mkdir -p dgraphtest/datafiles + - name: Set up Go uses: actions/setup-go@v6 with: @@ -55,3 +64,10 @@ jobs: go clean -testcache # sleep sleep 5 + + - name: Save benchmark dataset cache + if: success() + uses: actions/cache/save@v4 + with: + path: dgraphtest/datafiles + key: dataset-dgraphtest-v1 diff --git a/.github/workflows/ci-dgraph-ldbc-tests.yml b/.github/workflows/ci-dgraph-ldbc-tests.yml index 045ad3d42e9..4ba863dedb0 100644 --- a/.github/workflows/ci-dgraph-ldbc-tests.yml +++ b/.github/workflows/ci-dgraph-ldbc-tests.yml @@ -28,6 +28,12 @@ jobs: - name: Checkout Dgraph uses: actions/checkout@v5 + - name: Restore LDBC dataset cache + uses: actions/cache/restore@v4 + with: + path: ${{ github.workspace }}/test-data + key: dataset-ldbc-v1 + - name: Set up Go uses: actions/setup-go@v6 with: @@ -61,6 +67,13 @@ jobs: # move the binary cp dgraph/dgraph ~/go/bin/dgraph # run the ldbc tests - cd t; ./t --suite=ldbc + cd t; ./t --suite=ldbc --tmp=${{ github.workspace }}/test-data # clean up docker containers after test execution ./t -r + + - name: Save LDBC dataset cache + if: success() + uses: actions/cache/save@v4 + with: + path: ${{ github.workspace }}/test-data + key: dataset-ldbc-v1 diff --git a/.github/workflows/ci-dgraph-load-tests.yml b/.github/workflows/ci-dgraph-load-tests.yml index a1967b59d53..dd25be53a63 100644 --- a/.github/workflows/ci-dgraph-load-tests.yml +++ b/.github/workflows/ci-dgraph-load-tests.yml @@ -27,6 +27,12 @@ jobs: steps: - uses: actions/checkout@v5 + - name: Restore load test dataset cache + uses: actions/cache/restore@v4 + with: + path: ${{ github.workspace }}/test-data + key: dataset-load-v1 + - name: Set up Go uses: actions/setup-go@v6 with: @@ -60,8 +66,15 @@ jobs: # move the binary cp dgraph/dgraph ~/go/bin/dgraph # run the load tests - cd t; ./t --suite=load + cd t; ./t --suite=load --tmp=${{ github.workspace }}/test-data # clean up docker containers after test execution ./t -r # sleep sleep 5 + + - name: Save load test dataset cache + if: success() + uses: actions/cache/save@v4 + with: + path: ${{ github.workspace }}/test-data + key: dataset-load-v1 diff --git a/dgraphtest/load.go b/dgraphtest/load.go index 116c04a6b5c..d7bf35255e9 100644 --- a/dgraphtest/load.go +++ b/dgraphtest/load.go @@ -20,6 +20,7 @@ import ( "runtime" "strconv" "strings" + "time" "github.com/pkg/errors" @@ -41,10 +42,10 @@ func (c *LocalCluster) HostDgraphBinaryPath() string { } var datafiles = map[string]string{ - "1million.schema": "https://github.com/dgraph-io/dgraph-benchmarks/blob/main/data/1million.schema?raw=true", - "1million.rdf.gz": "https://github.com/dgraph-io/dgraph-benchmarks/blob/main/data/1million.rdf.gz?raw=true", - "21million.schema": "https://github.com/dgraph-io/dgraph-benchmarks/blob/main/data/21million.schema?raw=true", - "21million.rdf.gz": "https://github.com/dgraph-io/dgraph-benchmarks/blob/main/data/21million.rdf.gz?raw=true", + "1million.schema": "https://raw.githubusercontent.com/dgraph-io/dgraph-benchmarks/refs/heads/main/data/1million.schema", + "1million.rdf.gz": "https://media.githubusercontent.com/media/dgraph-io/dgraph-benchmarks/refs/heads/main/data/1million.rdf.gz", + "21million.schema": "https://raw.githubusercontent.com/dgraph-io/dgraph-benchmarks/refs/heads/main/data/21million.schema", + "21million.rdf.gz": "https://media.githubusercontent.com/media/dgraph-io/dgraph-benchmarks/refs/heads/main/data/21million.rdf.gz", } type DatasetType int @@ -604,11 +605,22 @@ func (d *Dataset) ensureFile(filename string) string { } func downloadFile(fname, url string) error { - cmd := exec.Command("wget", "-O", fname, url) - cmd.Dir = datasetFilesPath - - if _, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("error downloading file %s: %w", fname, err) + const maxRetries = 3 + fpath := filepath.Join(datasetFilesPath, fname) + for attempt := 1; attempt <= maxRetries; attempt++ { + cmd := exec.Command("wget", "--tries=3", "--waitretry=5", "--retry-connrefused", "-O", fname, url) + cmd.Dir = datasetFilesPath + + if out, err := cmd.CombinedOutput(); err != nil { + log.Printf("attempt %d/%d failed to download %s: %v\n%s", attempt, maxRetries, fname, err, string(out)) + if attempt < maxRetries { + time.Sleep(time.Duration(attempt*5) * time.Second) + continue + } + _ = os.Remove(fpath) + return fmt.Errorf("error downloading file %s after %d attempts: %w", fname, maxRetries, err) + } + return nil } return nil } diff --git a/t/t.go b/t/t.go index 168b6d364c7..cad64a1e1eb 100644 --- a/t/t.go +++ b/t/t.go @@ -1159,7 +1159,27 @@ var rdfFileNames = [...]string{ "workAt_0.rdf"} var ldbcDataFiles = map[string]string{ - "ldbcTypes.schema": "https://github.com/dgraph-io/dgraph-benchmarks/blob/main/ldbc/sf0.3/ldbcTypes.schema?raw=true", + "ldbcTypes.schema": "https://media.githubusercontent.com/media/dgraph-io/dgraph-benchmarks/refs/heads/main/ldbc/sf0.3/ldbcTypes.schema", +} + +func wgetWithRetry(fname, url, dir string) error { + const maxRetries = 3 + fpath := filepath.Join(dir, fname) + for attempt := 1; attempt <= maxRetries; attempt++ { + cmd := exec.Command("wget", "--tries=3", "--waitretry=5", "--retry-connrefused", "-O", fname, url) + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + fmt.Printf("attempt %d/%d failed to download %s: %v\n%s\n", attempt, maxRetries, fname, err, string(out)) + if attempt < maxRetries { + time.Sleep(time.Duration(attempt*5) * time.Second) + continue + } + _ = os.Remove(fpath) + return fmt.Errorf("failed to download %s after %d attempts: %w", fname, maxRetries, err) + } + return nil + } + return nil } func downloadDataFiles() { @@ -1168,12 +1188,13 @@ func downloadDataFiles() { return } for fname, link := range datafiles { - cmd := exec.Command("wget", "-O", fname, link) - cmd.Dir = *tmp - - if out, err := cmd.CombinedOutput(); err != nil { - fmt.Printf("Error %v\n", err) - panic(fmt.Sprintf("error downloading a file: %s", string(out))) + fpath := filepath.Join(*tmp, fname) + if fi, err := os.Stat(fpath); err == nil && fi.Size() > 0 { + fmt.Printf("Skipping %s (already exists)\n", fname) + continue + } + if err := wgetWithRetry(fname, link, *tmp); err != nil { + panic(fmt.Sprintf("error downloading %s: %v", fname, err)) } } } @@ -1189,20 +1210,26 @@ func downloadLDBCFiles(dir string) { } start := time.Now() + sem := make(chan struct{}, 5) var wg sync.WaitGroup for fname, link := range ldbcDataFiles { + fpath := filepath.Join(dir, fname) + if fi, err := os.Stat(fpath); err == nil && fi.Size() > 0 { + fmt.Printf("Skipping %s (already exists)\n", fname) + continue + } wg.Add(1) - go func(fname, link string, wg *sync.WaitGroup) { + go func(fname, link string) { defer wg.Done() - start := time.Now() - cmd := exec.Command("wget", "-O", fname, link) - cmd.Dir = dir - if out, err := cmd.CombinedOutput(); err != nil { - fmt.Printf("Error %v\n", err) - panic(fmt.Sprintf("error downloading a file: %s", string(out))) + sem <- struct{}{} + defer func() { <-sem }() + + dlStart := time.Now() + if err := wgetWithRetry(fname, link, dir); err != nil { + panic(fmt.Sprintf("error downloading %s: %v", fname, err)) } - fmt.Printf("Downloaded %s to %s in %s \n", fname, dir, time.Since(start)) - }(fname, link, &wg) + fmt.Printf("Downloaded %s to %s in %s \n", fname, dir, time.Since(dlStart)) + }(fname, link) } wg.Wait() fmt.Printf("Downloaded %d files in %s \n", len(ldbcDataFiles), time.Since(start)) @@ -1387,7 +1414,9 @@ func run() error { needsData := testSuiteContainsAny("load", "ldbc", "all") if needsData && *tmp == "" { *tmp = filepath.Join(os.TempDir(), "dgraph-test-data") - x.Check(testutil.MakeDirEmpty([]string{*tmp})) + } + if needsData { + x.Check(os.MkdirAll(*tmp, 0755)) } if testSuiteContainsAny("load", "all") { downloadDataFiles() @@ -1449,7 +1478,6 @@ func main() { procId = rand.Intn(1000) err := run() - _ = os.RemoveAll(*tmp) if err != nil { os.Exit(1) }