diff --git a/doc/release-notes/12454-S3Fixes.md b/doc/release-notes/12454-S3Fixes.md new file mode 100644 index 00000000000..06dd704fe01 --- /dev/null +++ b/doc/release-notes/12454-S3Fixes.md @@ -0,0 +1,5 @@ +### Support for Backblaze B2 as an S3 store, improved support for storJ + +An improvement to the .disable-tagging=true support for S3 stores now allows use of BackBlaze B2 as an S3 implementation (and may help other stores that do not handle tagging). + +The /api/datasets//cleanStorage endpoint will now work for datasets with more than 1000 files when storJ is used as the S3 store. \ No newline at end of file diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f97c5e09d29..d944f7781ae 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1461,6 +1461,12 @@ You may provide the values for these via any `supported MicroProfile Config API Reported Working S3-Compatible Storage ###################################### + +`BackBlaze B2 `_ + (as of 6/11/2026) + Set ``dataverse.files..disable-tagging=true``, as B2 does not support tagging (and will fail without this setting). + Tested with ``.path-style-access=true``, ``.download-redirect=true``, and ``.upload-redirect=true``. + `Minio v2018-09-12 `_ Set ``dataverse.files..path-style-access=true``, as Minio works path-based. Works pretty smooth, easy to setup. **Can be used for quick testing, too:** just use the example values above. Uses the public (read: unsecure and diff --git a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java index 7b8e5ebeec9..c5ef8e9b1b5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java @@ -77,23 +77,17 @@ public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException ListObjectsV2Request listReq = ListObjectsV2Request.builder() .bucket(dcmBucketName) .prefix(dcmDatasetKey) + .maxKeys(1000) .build(); - ListObjectsV2Response listRes; + List storedDcmDatasetFilesSummary = new ArrayList<>(); try { - listRes = s3.listObjectsV2(listReq); + s3.listObjectsV2Paginator(listReq).stream() + .flatMap(r -> r.contents().stream()) + .forEach(storedDcmDatasetFilesSummary::add); } catch (S3Exception se) { logger.info("Caught an S3Exception in s3ImportUtil: " + se.getMessage()); - throw new IOException("S3 listAuxObjects: failed to get a listing for " + dcmDatasetKey); - } - - List storedDcmDatasetFilesSummary = new ArrayList<>(listRes.contents()); - - while (listRes.isTruncated()) { - logger.fine("S3 listAuxObjects: going to next page of list"); - listReq = listReq.toBuilder().continuationToken(listRes.nextContinuationToken()).build(); - listRes = s3.listObjectsV2(listReq); - storedDcmDatasetFilesSummary.addAll(listRes.contents()); + throw new IOException("S3 listObjects: failed to get a listing for " + dcmDatasetKey); } for (S3Object item : storedDcmDatasetFilesSummary) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..1454acb9720 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -696,56 +696,53 @@ private File createTempFile(Path path, InputStream inputStream) throws IOExcepti return targetFile; } - @Override - public List listAuxObjects() throws IOException { - if (!this.canWrite()) { - open(); - } - String prefix = getDestinationKey(""); - - List ret = new ArrayList<>(); - ListObjectsV2Request listObjectsReqManual = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix) + private List listObjects(String prefix, String methodName) throws IOException { + List objects = new ArrayList<>(); + ListObjectsV2Request listRequest = ListObjectsV2Request.builder() + .bucket(bucketName) + .prefix(prefix) + .maxKeys(1000) // Required for storJ .build(); - ListObjectsV2Response listObjectsResponse = null; try { - listObjectsResponse = s3.listObjectsV2(listObjectsReqManual).get(); - } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3 listAuxObjects: failed to get a listing for " + prefix, e); - } - - if (listObjectsResponse == null) { - return ret; - } - - List storedAuxFilesSummary = new ArrayList<>(listObjectsResponse.contents()); - - try { - String nextContinuationToken = listObjectsResponse.nextContinuationToken(); - while (nextContinuationToken != null) { - logger.fine("S3 listAuxObjects: going to next page of list"); - ListObjectsV2Request nextReq = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix) - .continuationToken(nextContinuationToken).build(); - - ListObjectsV2Response nextResponse = s3.listObjectsV2(nextReq).get(); - if (nextResponse != null) { - storedAuxFilesSummary.addAll(nextResponse.contents()); - nextContinuationToken = nextResponse.nextContinuationToken(); - } else { - nextContinuationToken = null; + ListObjectsV2Response listResponse; + String nextToken = null; + do { + ListObjectsV2Request.Builder reqBuilder = listRequest.toBuilder(); + if (nextToken != null) { + reqBuilder = reqBuilder.continuationToken(nextToken); } - } + ListObjectsV2Request req = reqBuilder.build(); + listResponse = s3.listObjectsV2(req).get(); + objects.addAll(listResponse.contents()); + nextToken = listResponse.nextContinuationToken(); + if (listResponse.isTruncated() && nextToken == null) { + logger.warning("S3 " + methodName + ": list is truncated but nextContinuationToken is null; stopping to avoid infinite loop"); + break; + } + } while (listResponse.isTruncated()); } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3AccessIO: Failed to get aux objects for listing.", e); + throw new IOException("S3AccessIO: Failed to get objects for listing in " + methodName + ".", e); } + return objects; + } - for (S3Object item : storedAuxFilesSummary) { - String destinationKey = item.key(); - String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1); - logger.fine("S3 cached aux object fileName: " + fileName); - ret.add(fileName); + @Override + public List listAuxObjects() throws IOException { + if (!this.canWrite()) { + open(); } - return ret; + String prefix = getDestinationKey(""); + List contents = listObjects(prefix, "listAuxObjects"); + + return contents.stream() + .map(item -> { + String destinationKey = item.key(); + String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1); + logger.fine("S3 cached aux object fileName: " + fileName); + return fileName; + }) + .collect(Collectors.toList()); } @Override @@ -773,22 +770,7 @@ public void deleteAllAuxObjects() throws IOException { } String prefix = getDestinationKey(""); - - List storedAuxFilesSummary = new ArrayList<>(); - try { - ListObjectsV2Request listRequest = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix).build(); - - ListObjectsV2Response listResponse; - do { - listResponse = s3.listObjectsV2(listRequest).get(); - storedAuxFilesSummary.addAll(listResponse.contents()); - - listRequest = listRequest.toBuilder().continuationToken(listResponse.nextContinuationToken()).build(); - } while (listResponse.isTruncated()); - - } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3AccessIO: Failed to get aux objects for listing to delete.", e); - } + List storedAuxFilesSummary = listObjects(prefix, "deleteAllAuxObjects"); if (storedAuxFilesSummary.isEmpty()) { logger.fine("S3AccessIO: No auxiliary objects to delete."); @@ -986,7 +968,7 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { * @param auxiliaryFileName (optional) - file name, if different from the main * file label. * @return redirect url - * @throws IOException. + * @throws IOException */ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { @@ -1314,6 +1296,12 @@ private static AwsCredentialsProvider getCredentialsProvider(String driverId) { } public void removeTempTag() throws IOException { + final boolean taggingDisabled = JvmSettings.DISABLE_S3_TAGGING.lookupOptional(Boolean.class, this.driverId) + .orElse(false); + if (taggingDisabled) { + logger.fine("S3 tagging disabled for storage driver " + driverId + "; skipping temp tag removal."); + return; + } if (!(dvObject instanceof DataFile)) { logger.warning("Attempt to remove tag from non-file DVObject id: " + dvObject.getId()); throw new IOException("Attempt to remove temp tag from non-file S3 Object"); @@ -1453,47 +1441,11 @@ private List listAllFiles() throws IOException { } String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; - List ret = new ArrayList<>(); - ListObjectsV2Request listObjectsReqManual = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix) - .build(); - - ListObjectsV2Response listObjectsResponse = null; - try { - listObjectsResponse = s3.listObjectsV2(listObjectsReqManual).get(); - } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3 listObjects: failed to get a listing for " + prefix, e); - } - - if (listObjectsResponse == null) { - return ret; - } - - List storedFilesSummary = new ArrayList<>(listObjectsResponse.contents()); - - try { - String nextContinuationToken = listObjectsResponse.nextContinuationToken(); - while (nextContinuationToken != null) { - logger.fine("S3 listObjects: going to next page of list"); - ListObjectsV2Request nextReq = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix) - .continuationToken(nextContinuationToken).build(); - - ListObjectsV2Response nextResponse = s3.listObjectsV2(nextReq).get(); - if (nextResponse != null) { - storedFilesSummary.addAll(nextResponse.contents()); - nextContinuationToken = nextResponse.nextContinuationToken(); - } else { - nextContinuationToken = null; - } - } - } catch (InterruptedException | ExecutionException e) { - throw new IOException("S3AccessIO: Failed to get objects for listing.", e); - } + List contents = listObjects(prefix, "listAllFiles"); - for (S3Object item : storedFilesSummary) { - String fileName = item.key().substring(prefix.length()); - ret.add(fileName); - } - return ret; + return contents.stream() + .map(item -> item.key().substring(prefix.length())) + .collect(Collectors.toList()); } private void deleteFile(String fileName) throws IOException {