From f542c364d4629693aa8473ed6953e76947a0ed17 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 5 Jun 2026 14:13:03 -0400 Subject: [PATCH 1/5] don't delete tag if never set Note BackBlaze B2 doesn't support tags and appears to treate this as an object delete call. --- .../edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..268b4f37156 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -986,7 +986,7 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { * @param auxiliaryFileName (optional) - file name, if different from the main * file label. * @return redirect url - * @throws IOException. + * @throws IOException */ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { @@ -1314,6 +1314,12 @@ private static AwsCredentialsProvider getCredentialsProvider(String driverId) { } public void removeTempTag() throws IOException { + final boolean taggingDisabled = JvmSettings.DISABLE_S3_TAGGING.lookupOptional(Boolean.class, this.driverId) + .orElse(false); + if (taggingDisabled) { + logger.fine("S3 tagging disabled for storage driver " + driverId + "; skipping temp tag removal."); + return; + } if (!(dvObject instanceof DataFile)) { logger.warning("Attempt to remove tag from non-file DVObject id: " + dvObject.getId()); throw new IOException("Attempt to remove temp tag from non-file S3 Object"); From d48a9f7a2c20e8152cfb0249fa123eda9a57ed1a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 11 Jun 2026 09:46:15 -0400 Subject: [PATCH 2/5] add maxKeys for storJ, minor improvements --- .../iq/dataverse/S3PackageImporter.java | 18 +-- .../iq/dataverse/dataaccess/S3AccessIO.java | 122 ++++++++---------- 2 files changed, 58 insertions(+), 82 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java index 7b8e5ebeec9..c5ef8e9b1b5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java @@ -77,23 +77,17 @@ public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException ListObjectsV2Request listReq = ListObjectsV2Request.builder() .bucket(dcmBucketName) .prefix(dcmDatasetKey) + .maxKeys(1000) .build(); - ListObjectsV2Response listRes; + List storedDcmDatasetFilesSummary = new ArrayList<>(); try { - listRes = s3.listObjectsV2(listReq); + s3.listObjectsV2Paginator(listReq).stream() + .flatMap(r -> r.contents().stream()) + .forEach(storedDcmDatasetFilesSummary::add); } catch (S3Exception se) { logger.info("Caught an S3Exception in s3ImportUtil: " + se.getMessage()); - throw new IOException("S3 listAuxObjects: failed to get a listing for " + dcmDatasetKey); - } - - List storedDcmDatasetFilesSummary = new ArrayList<>(listRes.contents()); - - while (listRes.isTruncated()) { - logger.fine("S3 listAuxObjects: going to next page of list"); - listReq = listReq.toBuilder().continuationToken(listRes.nextContinuationToken()).build(); - listRes = s3.listObjectsV2(listReq); - storedDcmDatasetFilesSummary.addAll(listRes.contents()); + throw new IOException("S3 listObjects: failed to get a listing for " + dcmDatasetKey); } for (S3Object item : storedDcmDatasetFilesSummary) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 268b4f37156..fcb141607b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -704,47 +704,34 @@ public List listAuxObjects() throws IOException { String prefix = getDestinationKey(""); List ret = new ArrayList<>(); - ListObjectsV2Request listObjectsReqManual = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix) + ListObjectsV2Request listRequest = ListObjectsV2Request.builder() + .bucket(bucketName) + .prefix(prefix) + .maxKeys(1000) .build(); - ListObjectsV2Response listObjectsResponse = null; try { - listObjectsResponse = s3.listObjectsV2(listObjectsReqManual).get(); - } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3 listAuxObjects: failed to get a listing for " + prefix, e); - } - - if (listObjectsResponse == null) { - return ret; - } - - List storedAuxFilesSummary = new ArrayList<>(listObjectsResponse.contents()); - - try { - String nextContinuationToken = listObjectsResponse.nextContinuationToken(); - while (nextContinuationToken != null) { - logger.fine("S3 listAuxObjects: going to next page of list"); - ListObjectsV2Request nextReq = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix) - .continuationToken(nextContinuationToken).build(); - - ListObjectsV2Response nextResponse = s3.listObjectsV2(nextReq).get(); - if (nextResponse != null) { - storedAuxFilesSummary.addAll(nextResponse.contents()); - nextContinuationToken = nextResponse.nextContinuationToken(); - } else { - nextContinuationToken = null; + ListObjectsV2Response listResponse; + String nextToken = null; + do { + ListObjectsV2Request req = listRequest.toBuilder().continuationToken(nextToken).build(); + listResponse = s3.listObjectsV2(req).get(); + for (S3Object item : listResponse.contents()) { + String destinationKey = item.key(); + String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1); + logger.fine("S3 cached aux object fileName: " + fileName); + ret.add(fileName); } - } + nextToken = listResponse.nextContinuationToken(); + if (listResponse.isTruncated() && nextToken == null) { + logger.warning("S3 listAuxObjects: list is truncated but nextContinuationToken is null; stopping to avoid infinite loop"); + break; + } + } while (listResponse.isTruncated()); } catch (InterruptedException | ExecutionException e) { throw new IOException("S3AccessIO: Failed to get aux objects for listing.", e); } - for (S3Object item : storedAuxFilesSummary) { - String destinationKey = item.key(); - String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1); - logger.fine("S3 cached aux object fileName: " + fileName); - ret.add(fileName); - } return ret; } @@ -775,17 +762,25 @@ public void deleteAllAuxObjects() throws IOException { String prefix = getDestinationKey(""); List storedAuxFilesSummary = new ArrayList<>(); - try { - ListObjectsV2Request listRequest = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix).build(); + ListObjectsV2Request listRequest = ListObjectsV2Request.builder() + .bucket(bucketName) + .prefix(prefix) + .maxKeys(1000) + .build(); + try { ListObjectsV2Response listResponse; + String nextToken = null; do { - listResponse = s3.listObjectsV2(listRequest).get(); + ListObjectsV2Request req = listRequest.toBuilder().continuationToken(nextToken).build(); + listResponse = s3.listObjectsV2(req).get(); storedAuxFilesSummary.addAll(listResponse.contents()); - - listRequest = listRequest.toBuilder().continuationToken(listResponse.nextContinuationToken()).build(); + nextToken = listResponse.nextContinuationToken(); + if (listResponse.isTruncated() && nextToken == null) { + logger.warning("S3 deleteAllAuxObjects: list is truncated but nextContinuationToken is null; stopping to avoid infinite loop"); + break; + } } while (listResponse.isTruncated()); - } catch (InterruptedException | ExecutionException e) { throw new IOException("S3AccessIO: Failed to get aux objects for listing to delete.", e); } @@ -1460,45 +1455,32 @@ private List listAllFiles() throws IOException { String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; List ret = new ArrayList<>(); - ListObjectsV2Request listObjectsReqManual = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix) + ListObjectsV2Request listRequest = ListObjectsV2Request.builder() + .bucket(bucketName) + .prefix(prefix) + .maxKeys(1000) //Required for storJ .build(); - ListObjectsV2Response listObjectsResponse = null; try { - listObjectsResponse = s3.listObjectsV2(listObjectsReqManual).get(); - } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3 listObjects: failed to get a listing for " + prefix, e); - } - - if (listObjectsResponse == null) { - return ret; - } - - List storedFilesSummary = new ArrayList<>(listObjectsResponse.contents()); - - try { - String nextContinuationToken = listObjectsResponse.nextContinuationToken(); - while (nextContinuationToken != null) { - logger.fine("S3 listObjects: going to next page of list"); - ListObjectsV2Request nextReq = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix) - .continuationToken(nextContinuationToken).build(); - - ListObjectsV2Response nextResponse = s3.listObjectsV2(nextReq).get(); - if (nextResponse != null) { - storedFilesSummary.addAll(nextResponse.contents()); - nextContinuationToken = nextResponse.nextContinuationToken(); - } else { - nextContinuationToken = null; + ListObjectsV2Response listResponse; + String nextToken = null; + do { + ListObjectsV2Request req = listRequest.toBuilder().continuationToken(nextToken).build(); + listResponse = s3.listObjectsV2(req).get(); + for (S3Object item : listResponse.contents()) { + String fileName = item.key().substring(prefix.length()); + ret.add(fileName); } - } + nextToken = listResponse.nextContinuationToken(); + if (listResponse.isTruncated() && nextToken == null) { + logger.warning("S3 listAllFiles: list is truncated but nextContinuationToken is null; stopping to avoid infinite loop"); + break; + } + } while (listResponse.isTruncated()); } catch (InterruptedException | ExecutionException e) { throw new IOException("S3AccessIO: Failed to get objects for listing.", e); } - for (S3Object item : storedFilesSummary) { - String fileName = item.key().substring(prefix.length()); - ret.add(fileName); - } return ret; } From a94d165b0cb6198885a63a24d9f8b923e13b904d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 11 Jun 2026 14:59:20 -0400 Subject: [PATCH 3/5] doc B2, refactor list methods --- .../source/installation/config.rst | 6 ++ .../iq/dataverse/dataaccess/S3AccessIO.java | 98 ++++++------------- 2 files changed, 35 insertions(+), 69 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f97c5e09d29..d944f7781ae 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1461,6 +1461,12 @@ You may provide the values for these via any `supported MicroProfile Config API Reported Working S3-Compatible Storage ###################################### + +`BackBlaze B2 `_ + (as of 6/11/2026) + Set ``dataverse.files..disable-tagging=true``, as B2 does not support tagging (and will fail without this setting). + Tested with ``.path-style-access=true``, ``.download-redirect=true``, and ``.upload-redirect=true``. + `Minio v2018-09-12 `_ Set ``dataverse.files..path-style-access=true``, as Minio works path-based. Works pretty smooth, easy to setup. **Can be used for quick testing, too:** just use the example values above. Uses the public (read: unsecure and diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index fcb141607b8..491aa91bd17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -696,18 +696,12 @@ private File createTempFile(Path path, InputStream inputStream) throws IOExcepti return targetFile; } - @Override - public List listAuxObjects() throws IOException { - if (!this.canWrite()) { - open(); - } - String prefix = getDestinationKey(""); - - List ret = new ArrayList<>(); + private List listObjects(String prefix, String methodName) throws IOException { + List objects = new ArrayList<>(); ListObjectsV2Request listRequest = ListObjectsV2Request.builder() .bucket(bucketName) .prefix(prefix) - .maxKeys(1000) + .maxKeys(1000) // Required for storJ .build(); try { @@ -716,23 +710,35 @@ public List listAuxObjects() throws IOException { do { ListObjectsV2Request req = listRequest.toBuilder().continuationToken(nextToken).build(); listResponse = s3.listObjectsV2(req).get(); - for (S3Object item : listResponse.contents()) { - String destinationKey = item.key(); - String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1); - logger.fine("S3 cached aux object fileName: " + fileName); - ret.add(fileName); - } + objects.addAll(listResponse.contents()); nextToken = listResponse.nextContinuationToken(); if (listResponse.isTruncated() && nextToken == null) { - logger.warning("S3 listAuxObjects: list is truncated but nextContinuationToken is null; stopping to avoid infinite loop"); + logger.warning("S3 " + methodName + ": list is truncated but nextContinuationToken is null; stopping to avoid infinite loop"); break; } } while (listResponse.isTruncated()); } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3AccessIO: Failed to get aux objects for listing.", e); + throw new IOException("S3AccessIO: Failed to get objects for listing in " + methodName + ".", e); + } + return objects; + } + + @Override + public List listAuxObjects() throws IOException { + if (!this.canWrite()) { + open(); } + String prefix = getDestinationKey(""); + List contents = listObjects(prefix, "listAuxObjects"); - return ret; + return contents.stream() + .map(item -> { + String destinationKey = item.key(); + String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1); + logger.fine("S3 cached aux object fileName: " + fileName); + return fileName; + }) + .collect(Collectors.toList()); } @Override @@ -760,30 +766,7 @@ public void deleteAllAuxObjects() throws IOException { } String prefix = getDestinationKey(""); - - List storedAuxFilesSummary = new ArrayList<>(); - ListObjectsV2Request listRequest = ListObjectsV2Request.builder() - .bucket(bucketName) - .prefix(prefix) - .maxKeys(1000) - .build(); - - try { - ListObjectsV2Response listResponse; - String nextToken = null; - do { - ListObjectsV2Request req = listRequest.toBuilder().continuationToken(nextToken).build(); - listResponse = s3.listObjectsV2(req).get(); - storedAuxFilesSummary.addAll(listResponse.contents()); - nextToken = listResponse.nextContinuationToken(); - if (listResponse.isTruncated() && nextToken == null) { - logger.warning("S3 deleteAllAuxObjects: list is truncated but nextContinuationToken is null; stopping to avoid infinite loop"); - break; - } - } while (listResponse.isTruncated()); - } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3AccessIO: Failed to get aux objects for listing to delete.", e); - } + List storedAuxFilesSummary = listObjects(prefix, "deleteAllAuxObjects"); if (storedAuxFilesSummary.isEmpty()) { logger.fine("S3AccessIO: No auxiliary objects to delete."); @@ -1454,34 +1437,11 @@ private List listAllFiles() throws IOException { } String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; - List ret = new ArrayList<>(); - ListObjectsV2Request listRequest = ListObjectsV2Request.builder() - .bucket(bucketName) - .prefix(prefix) - .maxKeys(1000) //Required for storJ - .build(); - - try { - ListObjectsV2Response listResponse; - String nextToken = null; - do { - ListObjectsV2Request req = listRequest.toBuilder().continuationToken(nextToken).build(); - listResponse = s3.listObjectsV2(req).get(); - for (S3Object item : listResponse.contents()) { - String fileName = item.key().substring(prefix.length()); - ret.add(fileName); - } - nextToken = listResponse.nextContinuationToken(); - if (listResponse.isTruncated() && nextToken == null) { - logger.warning("S3 listAllFiles: list is truncated but nextContinuationToken is null; stopping to avoid infinite loop"); - break; - } - } while (listResponse.isTruncated()); - } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3AccessIO: Failed to get objects for listing.", e); - } + List contents = listObjects(prefix, "listAllFiles"); - return ret; + return contents.stream() + .map(item -> item.key().substring(prefix.length())) + .collect(Collectors.toList()); } private void deleteFile(String fileName) throws IOException { From c458f0664f3bf2ebb06886ab4dc302c56ffa7615 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 11 Jun 2026 15:07:29 -0400 Subject: [PATCH 4/5] rel note --- doc/release-notes/12454-S3Fixes.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/release-notes/12454-S3Fixes.md diff --git a/doc/release-notes/12454-S3Fixes.md b/doc/release-notes/12454-S3Fixes.md new file mode 100644 index 00000000000..06dd704fe01 --- /dev/null +++ b/doc/release-notes/12454-S3Fixes.md @@ -0,0 +1,5 @@ +### Support for Backblaze B2 as an S3 store, improved support for storJ + +An improvement to the .disable-tagging=true support for S3 stores now allows use of BackBlaze B2 as an S3 implementation (and may help other stores that do not handle tagging). + +The /api/datasets//cleanStorage endpoint will now work for datasets with more than 1000 files when storJ is used as the S3 store. \ No newline at end of file From a25dbd1f7135a94d9a1ace091aeabc7ed6d48b86 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 11 Jun 2026 15:11:32 -0400 Subject: [PATCH 5/5] avoid null cont. token --- .../edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 491aa91bd17..1454acb9720 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -708,7 +708,11 @@ private List listObjects(String prefix, String methodName) throws IOEx ListObjectsV2Response listResponse; String nextToken = null; do { - ListObjectsV2Request req = listRequest.toBuilder().continuationToken(nextToken).build(); + ListObjectsV2Request.Builder reqBuilder = listRequest.toBuilder(); + if (nextToken != null) { + reqBuilder = reqBuilder.continuationToken(nextToken); + } + ListObjectsV2Request req = reqBuilder.build(); listResponse = s3.listObjectsV2(req).get(); objects.addAll(listResponse.contents()); nextToken = listResponse.nextContinuationToken();