From a7d4b002209c13011dd8603d4e37f0edcd9c7664 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 6 Apr 2026 15:40:16 -0400 Subject: [PATCH 01/10] update downloadDatafiles to use writeGBRRecords (cherry picked from commit 2a1a98ece915c37a4f232907c2b7cb9b7c54e2e4) --- .../iq/dataverse/FileDownloadServiceBean.java | 188 +++++++++-- .../edu/harvard/iq/dataverse/api/Access.java | 302 +++++++++--------- src/main/java/propertyFiles/Bundle.properties | 2 + 3 files changed, 323 insertions(+), 169 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index ce8e8b5f4bc..81237ad7660 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -18,9 +18,12 @@ import edu.harvard.iq.dataverse.util.*; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; +import jakarta.faces.application.FacesMessage; import jakarta.faces.context.FacesContext; import jakarta.inject.Inject; import jakarta.inject.Named; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; import jakarta.servlet.ServletOutputStream; @@ -31,7 +34,7 @@ import java.sql.Timestamp; import java.util.*; import java.util.logging.Logger; -//import org.primefaces.context.RequestContext; +import java.io.FileNotFoundException; /** * @@ -43,6 +46,7 @@ @Named public class FileDownloadServiceBean implements java.io.Serializable { + public static final double GUESTBOOK_RESPONSE_BATCH_SIZE = 250; @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -123,23 +127,35 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo String customZipDownloadUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl); boolean useCustomZipService = customZipDownloadUrl != null; - String zipServiceKey = null; + String zipServiceKey = null; + List fileIdsList = new ArrayList<>(Arrays.asList(fileIds)); - // Do we need to write GuestbookRecord entries for the files? - if (!doNotSaveGuestbookRecord || useCustomZipService) { + List selectedDataFiles = new ArrayList<>(); + //Should not be getting exceptions with Dataverse generating the fileIds + try { + selectedDataFiles = resolveSelectedDataFilesInDataset(fileIdsList); + } catch (FileNotFoundException e) { + PrimeFaces.current().dialog().showMessageDynamic(new FacesMessage(FacesMessage.SEVERITY_ERROR, "Error", e.getMessage())); + return; + } catch (MultipleDatasetsException e) { + PrimeFaces.current().dialog().showMessageDynamic(new FacesMessage(FacesMessage.SEVERITY_ERROR, "Error", e.getMessage())); + return; + } - List list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(","))); - Timestamp timestamp = null; - - for (String idAsString : list) { + // Do we need to write GuestbookRecord entries for the files? + if (!doNotSaveGuestbookRecord) { + // Code here assumes user can download all files (which should be true when called from DatasetPage) + // Authorization will be checked in the custom zipper or redirect URL, so the only impact would be extra guestbookresponses + gbrid = writeGuestbookResponseRecords(guestbookResponse, selectedDataFiles ).getLast(); + } + if(useCustomZipService) { + + Timestamp timestamp = null; + //Reset values + + for (DataFile df : selectedDataFiles) { //DataFile df = datafileService.findCheapAndEasy(new Long(idAsString)); - DataFile df = datafileService.find(new Long(idAsString)); if (df != null) { - if (!doNotSaveGuestbookRecord) { - guestbookResponse.setDataFile(df); - gbrid = writeGuestbookResponseRecord(guestbookResponse); - } - if (useCustomZipService) { if (zipServiceKey == null) { zipServiceKey = generateServiceKey(); @@ -152,8 +168,9 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo } } } + } - + if (useCustomZipService) { redirectToCustomZipDownloadService(customZipDownloadUrl, zipServiceKey); } else { @@ -226,27 +243,153 @@ public void writeGuestbookResponseRecord(GuestbookResponse guestbookResponse, Fi writeGuestbookResponseRecord(guestbookResponse); } } - + + public List writeGuestbookResponseRecords(GuestbookResponse guestbookResponse, List selectedDataFiles) { + if (guestbookResponse == null || guestbookResponse.getSelectedFileIds() == null || guestbookResponse.getSelectedFileIds().isBlank()) { + return Collections.emptyList(); + } + + if (selectedDataFiles.isEmpty()) { + return Collections.emptyList(); + } + + List responsesToPersist = new ArrayList<>(selectedDataFiles.size()); + for (DataFile dataFile : selectedDataFiles) { + GuestbookResponse perFileResponse = new GuestbookResponse(guestbookResponse); + perFileResponse.setDataFile(dataFile); + responsesToPersist.add(perFileResponse); + } + List savedIds = saveGuestbookResponseRecordsAndMDCLogEntries(responsesToPersist); + return savedIds; + } + + public List resolveSelectedDataFilesInDataset(List rawFileIds) + throws FileNotFoundException, MultipleDatasetsException { + + List selectedDataFiles = new ArrayList<>(rawFileIds.size()); + Long datasetId = null; + + for (String rawFileId : rawFileIds) { + if (rawFileId == null || rawFileId.isBlank()) { + continue; + } + + Long fileId; + try { + fileId = Long.valueOf(rawFileId.trim()); + } catch (NumberFormatException nfe) { + throw new FileNotFoundException("Invalid file id: " + rawFileId); + } + + DataFile dataFile = datafileService.find(fileId); + if (dataFile == null) { + throw new FileNotFoundException("No file found for id: " + fileId); + } + + Long currentDatasetId = dataFile.getOwner().getId(); + if (datasetId == null) { + datasetId = currentDatasetId; + } else if (!datasetId.equals(currentDatasetId)) { + throw new MultipleDatasetsException( + "Selected files do not belong to the same dataset." + ); + } + + selectedDataFiles.add(dataFile); + } + + return selectedDataFiles; + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public List saveGuestbookResponseRecordsAndMDCLogEntries(List guestbookResponses) { + if (guestbookResponses == null || guestbookResponses.isEmpty()) { + return Collections.emptyList(); + } + + List savedIds = new ArrayList<>(guestbookResponses.size()); + + Timestamp now = new Timestamp(System.currentTimeMillis()); + + for (int i = 0; i < guestbookResponses.size(); i++) { + GuestbookResponse response = guestbookResponses.get(i); + + try { + response.setResponseTime(now); + em.persist(response); + + if (response.getId() != null) { + savedIds.add(response.getId().toString()); + } + + DatasetVersion version = response.getDatasetVersion(); + if (version == null) { + version = response.getDataset().getReleasedVersion(); + } + + DataFile dataFile = response.getDataFile(); + MakeDataCountEntry entry = new MakeDataCountEntry( + FacesContext.getCurrentInstance(), + dvRequestService, + version, + dataFile + ); + entry.setTargetUrl("/api/access/datafile/" + dataFile.getId()); + entry.setRequestUrl("/api/access/datafile/" + dataFile.getId()); + mdcLogService.logEntry(entry); + + if ((i + 1) % GUESTBOOK_RESPONSE_BATCH_SIZE == 0) { + em.flush(); + em.clear(); + } + } catch (RuntimeException ex) { + DataFile dataFile = response.getDataFile(); + logger.warning("Exception writing GuestbookResponse" + + (dataFile != null ? " for file: " + dataFile.getId() : "") + + " : " + ex.getLocalizedMessage()); + } + } + + em.flush(); + em.clear(); + + return savedIds; + } + public String writeGuestbookResponseRecord(GuestbookResponse guestbookResponse) { String guestbookResponseIds = ""; try { - CreateGuestbookResponseCommand cmd = new CreateGuestbookResponseCommand(dvRequestService.getDataverseRequest(), guestbookResponse, guestbookResponse.getDataset()); + CreateGuestbookResponseCommand cmd = new CreateGuestbookResponseCommand( + dvRequestService.getDataverseRequest(), + guestbookResponse, + guestbookResponse.getDataset() + ); commandEngine.submit(cmd); guestbookResponseIds = guestbookResponse.getId().toString(); + DatasetVersion version = guestbookResponse.getDatasetVersion(); - + //Sometimes guestbookResponse doesn't have a version, so we grab the released version if (null == version) { version = guestbookResponse.getDataset().getReleasedVersion(); } - MakeDataCountEntry entry = new MakeDataCountEntry(FacesContext.getCurrentInstance(), dvRequestService, version, guestbookResponse.getDataFile()); + + MakeDataCountEntry entry = new MakeDataCountEntry( + FacesContext.getCurrentInstance(), + dvRequestService, + version, + guestbookResponse.getDataFile() + ); //As the api download url is not available at this point we construct it manually entry.setTargetUrl("/api/access/datafile/" + guestbookResponse.getDataFile().getId()); entry.setRequestUrl("/api/access/datafile/" + guestbookResponse.getDataFile().getId()); mdcLogService.logEntry(entry); } catch (CommandException e) { //if an error occurs here then download won't happen no need for response recs... - logger.warning("Exception writing GuestbookResponse for file: " + guestbookResponse.getDataFile().getId() + " : " + e.getLocalizedMessage()); + logger.warning("Exception writing GuestbookResponse for file: " + + guestbookResponse.getDataFile().getId() + + " : " + + e.getLocalizedMessage()); } return guestbookResponseIds; } @@ -638,4 +781,9 @@ public String getDirectStorageLocatrion(String storageLocation) { return null; } + + public class MultipleDatasetsException extends Throwable { + public MultipleDatasetsException(String s) { + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index a2d7d3ed525..f2b14c51ada 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -1051,7 +1051,7 @@ private Response downloadDatafiles(ContainerRequestContext crc, String body, boo } String[] fileIdParams = getFileIdsCSV(body); - + List fileIdsList = new ArrayList<>(Arrays.asList(fileIdParams)); /* Note - fileIds coming from the POST ends in '\n' and a ',' has been added after the last file id number and before a * final '\n' - this stops the last item from being parsed in the fileIds.split(","); line below. */ @@ -1070,175 +1070,179 @@ private Response downloadDatafiles(ContainerRequestContext crc, String body, boo } } - Map datafilesMap = new HashMap<>(); - List authorizedDatafileIds = new ArrayList<>(); + List authorizedDatafiles = new ArrayList<>(); - // Get DataFiles, check authorized access, check for multiple Datasets, and check for required guestbook response - Set datasetIds = new HashSet<>(); - Boolean guestbookResponseRequired = null; - for (int i = 0; i < fileIdParams.length; i++) { - DataFile df = findDataFileUserCanSeeOrDieWrapper(fileIdParams[i], req); - if (guestbookResponseRequired == null) { - // Only need to check this on the first file - guestbookResponseRequired = checkGuestbookRequiredResponse(user, uriInfo, df, gbrids); - } - datafilesMap.put(df.getId(), df); - datasetIds.add(df.getOwner() != null ? df.getOwner().getId() : 0L); - if (isAccessAuthorized(user, df)) { - authorizedDatafileIds.add(df.getId()); - } - if (datasetIds.size() > 1) { - // All files must be from the same Dataset - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.multipleDatasets")); - } else if (authorizedDatafileIds.contains(df.getId()) && guestbookResponseRequired) { - try { - GuestbookResponse gbr = getGuestbookResponseFromBody(df, GuestbookResponse.DOWNLOAD, body, user); - if (gbr != null) { - engineSvc.submit(new CreateGuestbookResponseCommand(dvRequestService.getDataverseRequest(), gbr, gbr.getDataset())); - donotwriteGBResponse = true; - // Further down the actual download will also create a simple download response for every datafile listed based on the donotwriteGBResponse flag. - // Modifying donotwriteGBResponse will block that so we also need to log the MDC entry here - MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); - mdcLogService.logEntry(entry); - } else { - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", getGuestbookIdFromDatafile(df))); - } - } catch (JsonParseException | CommandException ex) { - List args = Arrays.asList(df.getDisplayName(), ex.getLocalizedMessage()); - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbook.commandError", args)); - } - } + // Get DataFiles, check authorized access, and check for required guestbook response + // ToDo - cache dataset perms, e.g. editdataset let's you get all files (assuming one dataset) + // Same for filedownload if assigned at the dataset level + + List selectedDataFiles = new ArrayList<>(); + try { + selectedDataFiles = fileDownloadService.resolveSelectedDataFilesInDataset(fileIdsList); + } catch (FileNotFoundException e) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.invalidFileId")); + } catch (FileDownloadServiceBean.MultipleDatasetsException e) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.multipleDatasets")); } + if(!selectedDataFiles.isEmpty()) { - if (useCustomZipService) { - URI redirect_uri = null; - try { - //ToDo - make extnerla Zipper LocallyFAIR aware - redirect_uri = handleCustomZipDownload(user, customZipServiceUrl, fileIdParams, uriInfo, headers, donotwriteGBResponse, true); - } catch (WebApplicationException wae) { - throw wae; - } + Boolean guestbookResponseRequired = checkGuestbookRequiredResponse(crc, uriInfo, selectedDataFiles.getFirst(), gbrids); + logger.fine("Downloading" + fileIdParams.length + " files. GBR required: " + guestbookResponseRequired); - Response redirect = Response.seeOther(redirect_uri).build(); - logger.fine("Issuing redirect to the file location on S3."); - throw new RedirectionException(redirect); + for (DataFile df : selectedDataFiles) { + if (isAccessAuthorized(user, df)) { + authorizedDatafiles.add(df); + } + } + if (!donotwriteGBResponse) { + if (guestbookResponseRequired) { + try { + GuestbookResponse gbr = getGuestbookResponseFromBody(authorizedDatafiles.getFirst(), GuestbookResponse.DOWNLOAD, body, user); + if (gbr != null) { + fileDownloadService.writeGuestbookResponseRecords(gbr, authorizedDatafiles); + } else { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", getGuestbookIdFromDatafile(authorizedDatafiles.getFirst()))); + } + } catch (JsonParseException ex) { + List args = Arrays.asList(authorizedDatafiles.getFirst().getDisplayName(), ex.getLocalizedMessage()); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbook.commandError", args)); + } - } + } else { + GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(authorizedDatafiles.getFirst().getOwner(), authorizedDatafiles.getFirst(), session, user); + fileDownloadService.writeGuestbookResponseRecords(gbr, authorizedDatafiles); + } + //We've written the gb responses if needed + donotwriteGBResponse = true; + } - // Not using the "custom service" - API will zip the file, - // and stream the output, in the "normal" manner: - // to use via anon inner class - final boolean getOriginal = getOrig; - final boolean skipGBResponse = donotwriteGBResponse; // Response may have been written prior and donotwriteGBResponse may have been modified. + if (useCustomZipService) { + URI redirect_uri = null; + try { + redirect_uri = handleCustomZipDownload(user, customZipServiceUrl, fileIdParams, uriInfo, headers, donotwriteGBResponse, true); + } catch (WebApplicationException wae) { + throw wae; + } - StreamingOutput stream = new StreamingOutput() { + Response redirect = Response.seeOther(redirect_uri).build(); + logger.fine("Issuing redirect to the file location on S3."); + throw new RedirectionException(redirect); - @Override - public void write(OutputStream os) throws IOException, - WebApplicationException { - DataFileZipper zipper = null; - String fileManifest = ""; - long sizeTotal = 0L; - - for (DataFile file : datafilesMap.values()) { - if (authorizedDatafileIds.contains(file.getId())) { - logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); - //downloadInstance.addDataFile(file); - if (skipGBResponse != true && file.isReleased()) { - GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, user); - guestbookResponseService.save(gbr); - MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, file); - mdcLogService.logEntry(entry); - } + } - if (zipper == null) { - // This is the first file we can serve - so we now know that we are going to be able - // to produce some output. - zipper = new DataFileZipper(os); - zipper.setFileManifest(fileManifest); - String bundleName = generateMultiFileBundleName(file.getOwner(), versionTag); - response.setHeader("Content-disposition", "attachment; filename=\"" + bundleName + "\""); - response.setHeader("Content-Type", "application/zip; name=\"" + bundleName + "\""); - } + // Not using the "custom service" - API will zip the file, + // and stream the output, in the "normal" manner: + + // to use via anon inner class + final boolean getOriginal = getOrig; + final boolean skipGBResponse = donotwriteGBResponse; // Response may have been written prior and donotwriteGBResponse may have been modified. + final List allSelectedDataFiles = selectedDataFiles; + StreamingOutput stream = new StreamingOutput() { + + @Override + public void write(OutputStream os) throws IOException, + WebApplicationException { + DataFileZipper zipper = null; + String fileManifest = ""; + long sizeTotal = 0L; + + for (DataFile file : allSelectedDataFiles) { + if (authorizedDatafiles.contains(file)) { + logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); + //downloadInstance.addDataFile(file); + + + if (zipper == null) { + // This is the first file we can serve - so we now know that we are going to be able + // to produce some output. + zipper = new DataFileZipper(os); + zipper.setFileManifest(fileManifest); + String bundleName = generateMultiFileBundleName(file.getOwner(), versionTag); + response.setHeader("Content-disposition", "attachment; filename=\"" + bundleName + "\""); + response.setHeader("Content-Type", "application/zip; name=\"" + bundleName + "\""); + } - long size = 0L; - // is the original format requested, and is this a tabular datafile, with a preserved original? - if (getOriginal - && file.isTabularData() - && !StringUtil.isEmpty(file.getDataTable().getOriginalFileFormat())) { - //This size check is probably fairly inefficient as we have to get all the AccessObjects - //We do this again inside the zipper. I don't think there is a better solution - //without doing a large deal of rewriting or architecture redo. - //The previous size checks for non-original download is still quick. - //-MAD 4.9.2 - // OK, here's the better solution: we now store the size of the original file in - // the database (in DataTable), so we get it for free. - // However, there may still be legacy datatables for which the size is not saved. - // so the "inefficient" code is kept, below, as a fallback solution. - // -- L.A., 4.10 - - if (file.getDataTable().getOriginalFileSize() != null) { - size = file.getDataTable().getOriginalFileSize(); + long size = 0L; + // is the original format requested, and is this a tabular datafile, with a preserved original? + if (getOriginal + && file.isTabularData() + && !StringUtil.isEmpty(file.getDataTable().getOriginalFileFormat())) { + //This size check is probably fairly inefficient as we have to get all the AccessObjects + //We do this again inside the zipper. I don't think there is a better solution + //without doing a large deal of rewriting or architecture redo. + //The previous size checks for non-original download is still quick. + //-MAD 4.9.2 + // OK, here's the better solution: we now store the size of the original file in + // the database (in DataTable), so we get it for free. + // However, there may still be legacy datatables for which the size is not saved. + // so the "inefficient" code is kept, below, as a fallback solution. + // -- L.A., 4.10 + + if (file.getDataTable().getOriginalFileSize() != null) { + size = file.getDataTable().getOriginalFileSize(); + } else { + DataAccessRequest daReq = new DataAccessRequest(); + StorageIO storageIO = DataAccess.getStorageIO(file, daReq); + storageIO.open(); + size = storageIO.getAuxObjectSize(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + + // save it permanently: + file.getDataTable().setOriginalFileSize(size); + fileService.saveDataTable(file.getDataTable()); + } + if (size == 0L) { + throw new IOException("Invalid file size or accessObject when checking limits of zip file"); + } } else { - DataAccessRequest daReq = new DataAccessRequest(); - StorageIO storageIO = DataAccess.getStorageIO(file, daReq); - storageIO.open(); - size = storageIO.getAuxObjectSize(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - - // save it permanently: - file.getDataTable().setOriginalFileSize(size); - fileService.saveDataTable(file.getDataTable()); + size = file.getFilesize(); } - if (size == 0L) { - throw new IOException("Invalid file size or accessObject when checking limits of zip file"); + if (sizeTotal + size < zipDownloadSizeLimit) { + sizeTotal += zipper.addFileToZipStream(file, getOriginal); + } else { + String fileName = file.getFileMetadata().getLabel(); + String mimeType = file.getContentType(); + + zipper.addToManifest(fileName + " (" + mimeType + ") " + " skipped because the total size of the download bundle exceeded the limit of " + zipDownloadSizeLimit + " bytes.\r\n"); } } else { - size = file.getFilesize(); + boolean embargoed = FileUtil.isActivelyEmbargoed(file); + boolean retentionExpired = FileUtil.isRetentionExpired(file); + String manifestEntry = file.getFileMetadata().getLabel() + " IS " + ( + embargoed ? "EMBARGOED" : + retentionExpired ? "RETENTIONEXPIRED" : + file.isRestricted() ? "RESTRICTED" : + "NOTAUTHORIZED") + + " AND CANNOT BE DOWNLOADED\r\n"; + if (zipper == null) { + fileManifest = fileManifest + manifestEntry; + } else { + zipper.addToManifest(manifestEntry); + } } - if (sizeTotal + size < zipDownloadSizeLimit) { - sizeTotal += zipper.addFileToZipStream(file, getOriginal); - } else { - String fileName = file.getFileMetadata().getLabel(); - String mimeType = file.getContentType(); + } - zipper.addToManifest(fileName + " (" + mimeType + ") " + " skipped because the total size of the download bundle exceeded the limit of " + zipDownloadSizeLimit + " bytes.\r\n"); - } - } else { - boolean embargoed = FileUtil.isActivelyEmbargoed(file); - boolean retentionExpired = FileUtil.isRetentionExpired(file); - String manifestEntry = file.getFileMetadata().getLabel() + " IS " + ( - embargoed ? "EMBARGOED" : - retentionExpired ? "RETENTIONEXPIRED" : - file.isRestricted() ? "RESTRICTED" : - "NOTAUTHORIZED") - + " AND CANNOT BE DOWNLOADED\r\n"; - if (zipper == null) { - fileManifest = fileManifest + manifestEntry; - } else { - zipper.addToManifest(manifestEntry); - } + if (zipper == null) { + // If the DataFileZipper object is still NULL, it means that + // there were file ids supplied - but none of the corresponding + // files were accessible for this user. + // In which case we don't bother generating any output, and + // just give them a 403: + throw new ForbiddenException(); } - } - if (zipper == null) { - // If the DataFileZipper object is still NULL, it means that - // there were file ids supplied - but none of the corresponding - // files were accessible for this user. - // In which case we don't bother generating any output, and - // just give them a 403: - throw new ForbiddenException(); - } + // This will add the generated File Manifest to the zipped output, + // then flush and close the stream: + zipper.finalizeZipStream(); - // This will add the generated File Manifest to the zipped output, - // then flush and close the stream: - zipper.finalizeZipStream(); + //os.flush(); + //os.close(); + } + }; - //os.flush(); - //os.close(); - } - }; - return Response.ok(stream).build(); + return Response.ok(stream).build(); + } + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.noFiles")); } /* diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 0825816048d..94be049fdd1 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2969,6 +2969,8 @@ access.api.exception.dataset.not.found=Could not find requested dataset. access.api.download.failure.guestbookResponseMissing=You may not download this file without the required Guestbook response for guestbookID {0}. access.api.download.failure.guestbook.commandError=Problem trying download with guestbook response on {0} : {1} access.api.download.failure.multipleDatasets=All files being downloaded must be from the same Dataset. +access.api.download.failure.invalidFileId=One or more files could not be found. +access.api.download.failure.noFiles=No files to download. #permission permission.AddDataverse.label=AddDataverse From 447470d8cab8a03912282f44f9a8d7f7af1e775d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 6 Apr 2026 17:14:07 -0400 Subject: [PATCH 02/10] fix getting ids, empty check (cherry picked from commit 5da66f5a27b2d110fadbe3bd1bc9d098c13e0f04) --- .../iq/dataverse/FileDownloadServiceBean.java | 20 +++++++++++-------- .../edu/harvard/iq/dataverse/api/Access.java | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 81237ad7660..0e3d458b2ac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -28,6 +28,7 @@ import jakarta.persistence.PersistenceContext; import jakarta.servlet.ServletOutputStream; import jakarta.servlet.http.HttpServletResponse; + import org.primefaces.PrimeFaces; import java.io.IOException; @@ -146,7 +147,12 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo if (!doNotSaveGuestbookRecord) { // Code here assumes user can download all files (which should be true when called from DatasetPage) // Authorization will be checked in the custom zipper or redirect URL, so the only impact would be extra guestbookresponses - gbrid = writeGuestbookResponseRecords(guestbookResponse, selectedDataFiles ).getLast(); + List gbrids = writeGuestbookResponseRecords(guestbookResponse, selectedDataFiles); + if(!gbrids.isEmpty()){ + gbrid = gbrids.getFirst(); + } else { + logger.warning("No GuestbookResponse records were created for the files that were selected for download."); + } } if(useCustomZipService) { @@ -170,7 +176,6 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo } } - if (useCustomZipService) { redirectToCustomZipDownloadService(customZipDownloadUrl, zipServiceKey); } else { @@ -245,7 +250,7 @@ public void writeGuestbookResponseRecord(GuestbookResponse guestbookResponse, Fi } public List writeGuestbookResponseRecords(GuestbookResponse guestbookResponse, List selectedDataFiles) { - if (guestbookResponse == null || guestbookResponse.getSelectedFileIds() == null || guestbookResponse.getSelectedFileIds().isBlank()) { + if (guestbookResponse == null || selectedDataFiles == null) { return Collections.emptyList(); } @@ -318,10 +323,6 @@ public List saveGuestbookResponseRecordsAndMDCLogEntries(List saveGuestbookResponseRecordsAndMDCLogEntries(List Date: Thu, 18 Jun 2026 16:13:43 -0400 Subject: [PATCH 03/10] add Locally FAIR to file retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary • Updated multi-file download logic to enforce Locally FAIR restrictions. Changes • Modified FileDownloadServiceBean.resolveSelectedDataFilesInDataset to accept a DataverseRequest and perform a isLocallyFAIR check on the first file being resolved. • Updated FileDownloadServiceBean.writeGuestbookAndStartBatchDownload to pass the current DataverseRequest when resolving files. • Updated Access.downloadDatafiles in the API to pass the DataverseRequest to the file resolution service. • Added necessary import for DataverseRequest in FileDownloadServiceBean.java. --- .../harvard/iq/dataverse/FileDownloadServiceBean.java | 11 +++++++++-- .../java/edu/harvard/iq/dataverse/api/Access.java | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 0e3d458b2ac..3def42fa7cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateGuestbookResponseCommand; import edu.harvard.iq.dataverse.engine.command.impl.RequestAccessCommand; @@ -134,7 +135,7 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo List selectedDataFiles = new ArrayList<>(); //Should not be getting exceptions with Dataverse generating the fileIds try { - selectedDataFiles = resolveSelectedDataFilesInDataset(fileIdsList); + selectedDataFiles = resolveSelectedDataFilesInDataset(fileIdsList, dvRequestService.getDataverseRequest()); } catch (FileNotFoundException e) { PrimeFaces.current().dialog().showMessageDynamic(new FacesMessage(FacesMessage.SEVERITY_ERROR, "Error", e.getMessage())); return; @@ -268,7 +269,7 @@ public List writeGuestbookResponseRecords(GuestbookResponse guestbookRes return savedIds; } - public List resolveSelectedDataFilesInDataset(List rawFileIds) + public List resolveSelectedDataFilesInDataset(List rawFileIds, DataverseRequest req) throws FileNotFoundException, MultipleDatasetsException { List selectedDataFiles = new ArrayList<>(rawFileIds.size()); @@ -290,6 +291,12 @@ public List resolveSelectedDataFilesInDataset(List rawFileIds) if (dataFile == null) { throw new FileNotFoundException("No file found for id: " + fileId); } + + if (selectedDataFiles.isEmpty()) { + if (dataFile.isLocallyFAIR() && !permissionService.hasLocallyFAIRAccess(req, dataFile)) { + throw new FileNotFoundException("No file found for id: " + fileId); + } + } Long currentDatasetId = dataFile.getOwner().getId(); if (datasetId == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 2b18c71af2b..363d9caeaa7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -1078,7 +1078,8 @@ private Response downloadDatafiles(ContainerRequestContext crc, String body, boo List selectedDataFiles = new ArrayList<>(); try { - selectedDataFiles = fileDownloadService.resolveSelectedDataFilesInDataset(fileIdsList); + // Locally FAIR permission check is made in this call + selectedDataFiles = fileDownloadService.resolveSelectedDataFilesInDataset(fileIdsList, req); } catch (FileNotFoundException e) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.invalidFileId")); } catch (FileDownloadServiceBean.MultipleDatasetsException e) { From 3a42f9f218d3d3f44c999c167e8be4ccdf4bf376 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 19 Jun 2026 16:03:11 -0400 Subject: [PATCH 04/10] refactors, use writeGBRRecords in processDatafiles... --- .../edu/harvard/iq/dataverse/api/Access.java | 229 +++++++++--------- 1 file changed, 111 insertions(+), 118 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 363d9caeaa7..eeb3b2bf5bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -170,10 +170,10 @@ public BundleDownloadInstance datafileBundle(@Context ContainerRequestContext cr // This will throw a ForbiddenException if access isn't authorized: checkAuthorization(req.getUser(), df); - if (checkGuestbookRequiredResponse(req.getUser(), uriInfo, df, gbrids)) { + if (checkGuestbookRequiredResponse(req.getUser(), uriInfo, df.getOwner(), gbrids)) { throw new BadRequestException(BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", getGuestbookIdFromDatafile(df))); } - + // Assumes gbrecs is always true if gbrids is sent if (gbrecs != true && df.isReleased()) { // Write Guestbook record if not done previously and file is released GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, getRequestor(req.getUser())); @@ -226,7 +226,8 @@ public BundleDownloadInstance datafileBundle(@Context ContainerRequestContext cr public BundleDownloadInstance datafileBundleWithGuestbookResponse(@Context ContainerRequestContext crc, @PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("gbrids") String gbrids, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, String jsonBody) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { DataverseRequest req = createDataverseRequest(getRequestUser(crc)); - processDatafileWithGuestbookResponse(crc, req, headers, fileId, uriInfo, gbrecs, jsonBody); + List fileIdsList = getFileIdsCSV(fileId); + processDatafilesWithGuestbookResponse(crc, req, headers, fileIdsList, uriInfo, gbrecs, jsonBody); // JSF UI passes the guestbook response id(s) in thus this qp can be removed when JSF is removed if (gbrids == null || gbrids.isEmpty()) { gbrids = (String) crc.getProperty("gbrids"); @@ -276,10 +277,10 @@ public Response datafile(@Context ContainerRequestContext crc, @PathParam("fileI // This will throw a ForbiddenException if access isn't authorized: checkAuthorization(req.getUser(), df); - if (checkGuestbookRequiredResponse(req.getUser(), uriInfo, df, gbrids)) { + if (checkGuestbookRequiredResponse(req.getUser(), uriInfo, df.getOwner(), gbrids)) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", getGuestbookIdFromDatafile(df))); } - + // Assumes gbrecs is always true if gbrids is sent if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, getRequestor(req.getUser())); @@ -411,7 +412,8 @@ public Response datafileWithGuestbookResponse(@Context ContainerRequestContext c DataverseRequest req = createDataverseRequest(getRequestUser(crc)); fileId = normalizeFileId(fileId, req); - return processDatafileWithGuestbookResponse(crc, req, headers, fileId, uriInfo, gbrecs, jsonBody); + List fileIdsList = getFileIdsCSV(fileId); + return processDatafilesWithGuestbookResponse(crc, req, headers, fileIdsList, uriInfo, gbrecs, jsonBody); } private String normalizeFileId(String fileId, DataverseRequest req) { @@ -444,64 +446,72 @@ private List getGuestbookIdFromDatafile(DataFile df) { } // Process the guestbook response from JSON and return a signedUrl to the matching GET call - private Response processDatafileWithGuestbookResponse(ContainerRequestContext crc, DataverseRequest req, HttpHeaders headers, String fileIds, UriInfo uriInfo, boolean gbrecs, String jsonBody) { + private Response processDatafilesWithGuestbookResponse(ContainerRequestContext crc, DataverseRequest req, HttpHeaders headers, List fileIdsList, UriInfo uriInfo, boolean gbrecs, String jsonBody) { User user = req.getUser(); - // Get and validate all the DataFiles first - Map datafilesMap = getDatafilesMap(req, fileIds); + // Get and validate all the DataFiles first - includes Locally FAIR check + List datafiles = getDatafilesList(req, fileIdsList); + + if (datafiles.isEmpty()) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.noFiles")); + } // Handle Guestbook Responses - String displayName = ""; String gbrids = null; - List fileIdList = new ArrayList<>(); - String id = null; - try { - // since all files must be in the same Dataset we can generate a Guestbook Response once and just replace the DataFile for each file in the list - DataFile firstDatafile = datafilesMap.values().size() > 0 ? (DataFile) Arrays.stream(datafilesMap.values().toArray()).findFirst().get() : null; - id = firstDatafile.getOwner().getId().toString(); - GuestbookResponse gbr = getGuestbookResponseFromBody(firstDatafile, GuestbookResponse.DOWNLOAD, jsonBody, user); - boolean guestbookResponseRequired = checkGuestbookRequiredResponse(user, uriInfo, firstDatafile, null); - for (DataFile df : datafilesMap.values()) { - displayName = df.getDisplayName(); - fileIdList.add(String.valueOf(df.getId())); - if (guestbookResponseRequired) { - if (gbr != null) { - gbr.setDataFile(df); - guestbookResponseService.save(gbr); - gbrids = gbr.getId().toString(); - MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); - mdcLogService.logEntry(entry); - } else { - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", getGuestbookIdFromDatafile(df))); + DataFile firstDatafile = datafiles.getFirst(); + Dataset ds = firstDatafile.getOwner(); + + boolean guestbookResponseRequired = checkGuestbookRequiredResponse(user, uriInfo, ds, null); + // ToDo - should a guestbook cause a response even for draft files? + if (guestbookResponseRequired) { + try { + GuestbookResponse gbr = getGuestbookResponseFromBody(firstDatafile, GuestbookResponse.DOWNLOAD, jsonBody, user); + if (gbr != null) { + List gbridList = fileDownloadService.writeGuestbookResponseRecords(gbr, datafiles); + if (gbridList != null && !gbridList.isEmpty()) { + //Get the last one / the one with the latest date + gbrids = gbridList.getLast(); } - } else if (gbrecs != true && df.isReleased()) { - // Write Guestbook record if not done previously and file is released - GuestbookResponse defaultResponse = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, user); - guestbookResponseService.save(defaultResponse); - MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); - mdcLogService.logEntry(entry); + } else { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", getGuestbookIdFromDatafile(firstDatafile))); + } + } catch (JsonParseException ex) { + List args = Arrays.asList(firstDatafile.getDisplayName(), ex.getLocalizedMessage()); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbook.commandError", args)); + } + + } else if (!gbrecs) { + List releasedFiles = datafiles.stream().filter(DataFile::isReleased).collect(Collectors.toList()); + if (!releasedFiles.isEmpty()) { + // Write Guestbook record if not done previously and file is released + GuestbookResponse defaultResponse = guestbookResponseService.initAPIGuestbookResponse(firstDatafile.getOwner(), firstDatafile, session, user); + fileDownloadService.writeGuestbookResponseRecords(defaultResponse, datafiles); } - } catch (JsonParseException ex) { - List args = Arrays.asList(displayName, ex.getLocalizedMessage()); - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbook.commandError", args)); } - // Check if requesting datafile(s) or all files within dataset - if (!uriInfo.getPath().toLowerCase().contains("/dataset/")) { - id = String.join(",", fileIdList); + + // Check if requesting datafile or all files within dataset using :persistentId + // If so, we need the datafile/dataset id to create a canonical signed URL + String idString = null; + if (!uriInfo.getPath().toLowerCase().contains(":persitentId")) { + if (!uriInfo.getPath().toLowerCase().contains("/dataset/")) { + // Should only be one file if :persistentId is being used + idString = datafiles.stream().map(df -> String.valueOf(df.getId())).collect(Collectors.joining(",")); + } else { + idString = ds.getId().toString(); + } } - return returnSignedUrl(crc, uriInfo, user, id, gbrids); + return returnSignedDownloadUrl(crc, uriInfo, user, idString, gbrids); } - private Map getDatafilesMap(DataverseRequest req, String fileIds) { - String fileIdParams[] = getFileIdsCSV(fileIds); - Map datafilesMap = new HashMap<>(); + private List getDatafilesList(DataverseRequest req, List fileIdsList) { + List datafilesList = new ArrayList<>(); Long datasetId = null; // Get and validate all the DataFiles first - if (fileIdParams != null && fileIdParams.length > 0) { - for (int i = 0; i < fileIdParams.length; i++) { - DataFile df = findDataFileUserCanSeeOrDieWrapper(fileIdParams[i], req); + if (fileIdsList != null && !fileIdsList.isEmpty()) { + for (int i = 0; i < fileIdsList.size(); i++) { + DataFile df = findDataFileUserCanSeeOrDieWrapper(fileIdsList.get(i), req); if (df.isHarvested()) { String errorMessage = "Datafile " + df.getId() + " is a harvested file that cannot be accessed in this Dataverse"; @@ -522,13 +532,13 @@ private Map getDatafilesMap(DataverseRequest req, String fileIds // This will throw a ForbiddenException if access isn't authorized: checkAuthorization(req.getUser(), df); - datafilesMap.put(df.getId(), df); + datafilesList.add(df); } } - return datafilesMap; + return datafilesList; } - private Response returnSignedUrl(ContainerRequestContext crc, UriInfo uriInfo, User user, String id, String gbrids) { + private Response returnSignedDownloadUrl(ContainerRequestContext crc, UriInfo uriInfo, User user, String id, String gbrids) { // Create the signed URL String userIdentifier = null; String key = null; @@ -550,7 +560,7 @@ private Response returnSignedUrl(ContainerRequestContext crc, UriInfo uriInfo, U // Guest userIdentifier = "guest"; // Note: In order for the key to match we need to replace ":persistentId" with the actual file id since that is what will be sent in via the signed url. - key = URLDecoder.decode(uriInfo.getAbsolutePath().toASCIIString()) + key = URLDecoder.decode(uriInfo.getAbsolutePath().toASCIIString(), StandardCharsets.US_ASCII) .replace(":persistentId", id); //TODO find a better one for here and in SignedUrlAuthMechanism.java } @@ -559,7 +569,7 @@ private Response returnSignedUrl(ContainerRequestContext crc, UriInfo uriInfo, U if (gbrids != null && !gbrids.isEmpty()) { builder.replaceQueryParam("gbrids", gbrids); } - builder.replaceQueryParam("persistentId", null); // remove this as a parm and add the id to the path + builder.replaceQueryParam("persistentId", (Object) null); // remove this as a parm and add the id to the path crc.setProperty("gbrids", gbrids); String baseUrlEncoded = builder.build().toString(); String baseUrl = URLDecoder.decode(baseUrlEncoded, StandardCharsets.UTF_8); @@ -805,14 +815,16 @@ public DownloadInstance downloadAuxiliaryFile(@Context ContainerRequestContext c public Response postDownloadDatafiles(@Context ContainerRequestContext crc, String body, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("gbrids") String gbrids, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { DataverseRequest req = createDataverseRequest(getRequestUser(crc)); - processDatafileWithGuestbookResponse(crc, req, headers, body, uriInfo, gbrecs, body); + List fileIdsList = getFileIdsCSV(body); + processDatafilesWithGuestbookResponse(crc, req, headers, fileIdsList, uriInfo, gbrecs, body); + // JSF UI passes the guestbook response id(s) in thus this qp can be removed when JSF is removed if (gbrids == null || gbrids.isEmpty()) { gbrids = (String) crc.getProperty("gbrids"); } - // There is no get for this so we shouldn't return a signed url + // initiate the download now - return downloadDatafiles(crc, body, gbrecs, gbrids, uriInfo, headers, response, null); + return downloadDatafiles(crc, fileIdsList, gbrecs, gbrids, uriInfo, headers, response, null); } @GET @@ -831,7 +843,7 @@ public Response downloadAllFromLatest(@Context ContainerRequestContext crc, @Pat // is that we know that guest never has the Permission.ViewUnpublishedDataset. final DatasetVersion draft = versionService.getDatasetVersionById(retrieved.getId(), DatasetVersion.VersionState.DRAFT.toString()); if (draft != null && permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { - String fileIds = getFileIdsAsCommaSeparated(draft.getFileMetadatas()); + List fileIds = getFileIdsList(draft.getFileMetadatas()); // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: @@ -855,8 +867,8 @@ public Response downloadAllFromLatest(@Context ContainerRequestContext crc, @Pat //throw new NotFoundException(); } - String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); - return downloadDatafiles(crc, fileIds, gbrecs, gbrids, uriInfo, headers, response, latest.getFriendlyVersionNumber()); + List fileIdsList = getFileIdsList(latest.getFileMetadatas()); + return downloadDatafiles(crc, fileIdsList, gbrecs, gbrids, uriInfo, headers, response, latest.getFriendlyVersionNumber()); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -870,23 +882,23 @@ public Response downloadAllFromLatestWithGuestbookResponse(@Context ContainerReq User user = getRequestUser(crc); DataverseRequest req = createDataverseRequest(user); final Dataset retrieved = findDatasetOrDie(datasetIdOrPersistentId); - String fileIds = ""; + List fileIdsList = new ArrayList<>(0); String version = null; // If user can view the draft version download those files and don't count them if (!(user instanceof GuestUser)) { final DatasetVersion draft = versionService.getDatasetVersionById(retrieved.getId(), DatasetVersion.VersionState.DRAFT.toString()); if (draft != null && permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { - fileIds = getFileIdsAsCommaSeparated(draft.getFileMetadatas()); + fileIdsList = getFileIdsList(draft.getFileMetadatas()); gbrecs = true; version = "draft"; } } if (version == null) { final DatasetVersion latest = versionService.getLatestReleasedVersionFast(retrieved.getId()); - fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); + fileIdsList = getFileIdsList(latest.getFileMetadatas()); version = latest.getFriendlyVersionNumber(); } - return processDatafileWithGuestbookResponse(crc, req, headers, fileIds, uriInfo, gbrecs, jsonBody); + return processDatafilesWithGuestbookResponse(crc, req, headers, fileIdsList, uriInfo, gbrecs, jsonBody); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -907,7 +919,7 @@ public Response downloadAllFromVersion(@Context ContainerRequestContext crc, @Pa // -- L.A.) return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.exception.version.not.found")); } - String fileIds = getFileIdsAsCommaSeparated(dsv.getFileMetadatas()); + List fileIdsList = getFileIdsList(dsv.getFileMetadatas()); // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: @@ -915,7 +927,7 @@ public Response downloadAllFromVersion(@Context ContainerRequestContext crc, @Pa gbrecs = true; } - return downloadDatafiles(crc, fileIds, gbrecs, gbrids, uriInfo, headers, response, dsv.getFriendlyVersionNumber().toLowerCase()); + return downloadDatafiles(crc, fileIdsList, gbrecs, gbrids, uriInfo, headers, response, dsv.getFriendlyVersionNumber().toLowerCase()); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -931,8 +943,8 @@ public Response downloadAllFromVersionWithGuestbookResponse(@Context ContainerRe try { DataverseRequest req = createDataverseRequest(getRequestUser(crc)); DatasetVersion dsv = getDatasetVersionFromVersion(crc, datasetIdOrPersistentId, versionId); - String fileIds = getFileIdsAsCommaSeparated(dsv.getFileMetadatas()); - return processDatafileWithGuestbookResponse(crc, req, headers, fileIds, uriInfo, gbrecs, jsonBody); + List fileIdsList = getFileIdsList(dsv.getFileMetadatas()); + return processDatafilesWithGuestbookResponse(crc, req, headers, fileIdsList, uriInfo, gbrecs, jsonBody); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -965,13 +977,8 @@ public Command handleLatestPublished() { })); } - private static String getFileIdsAsCommaSeparated(List fileMetadatas) { - List ids = new ArrayList<>(); - for (FileMetadata fileMetadata : fileMetadatas) { - Long fileId = fileMetadata.getDataFile().getId(); - ids.add(String.valueOf(fileId)); - } - return String.join(",", ids); + private static List getFileIdsList(List fileMetadatas) { + return fileMetadatas.stream().map(fileMetadata -> fileMetadata.getId().toString()).collect(Collectors.toList()); } private String generateMultiFileBundleName(Dataset dataset, String versionTag) { @@ -1002,7 +1009,8 @@ private String generateMultiFileBundleName(Dataset dataset, String versionTag) { public Response datafiles(@Context ContainerRequestContext crc, @PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("gbrids") String gbrids, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(crc, fileIds, gbrecs, gbrids, uriInfo, headers, response, null); + List fileIdsList = getFileIdsCSV(fileIds); + return downloadDatafiles(crc, fileIdsList, gbrecs, gbrids, uriInfo, headers, response, null); } @POST @@ -1013,10 +1021,11 @@ public Response datafilesWithGuestbookResponse(@Context ContainerRequestContext @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, String jsonBody) throws WebApplicationException { DataverseRequest req = createDataverseRequest(getRequestUser(crc)); - return processDatafileWithGuestbookResponse(crc, req, headers, fileIds, uriInfo, gbrecs, jsonBody); + List fileIdsList = getFileIdsCSV(fileIds); + return processDatafilesWithGuestbookResponse(crc, req, headers, fileIdsList, uriInfo, gbrecs, jsonBody); } - private String[] getFileIdsCSV(String body) { + private List getFileIdsCSV(String body) { /* BODY has 3 variations coming from path parameter of GET or body of POST: "1,2,3," "fileIds=1,2,3" @@ -1028,34 +1037,28 @@ private String[] getFileIdsCSV(String body) { if (jsonObject.containsKey("fileIds")) { JsonArray ids = jsonObject.getJsonArray("fileIds"); List idList = ids.getValuesAs(JsonNumber.class); - return idList.stream().map(JsonNumber::toString).toArray(String[]::new); + return idList.stream().map(JsonNumber::toString).collect(Collectors.toList()); } else { - return new String[0]; + return new ArrayList<>(); } } else { // Trim string "fileIds=" from the front if exists String csv = body.substring(body.startsWith("fileIds=") ? 8 : 0); //String[] list = body.substring(body.startsWith("fileIds=") ? 8 : 0).replaceAll(",$", "").split(","); return Arrays.asList(csv.split(",")).stream().map(String::trim) - .filter(s -> !s.isEmpty()).collect(Collectors.toList()).toArray(new String[0]); + .filter(s -> !s.isEmpty()).collect(Collectors.toList()); } } - private Response downloadDatafiles(ContainerRequestContext crc, String body, boolean donotwriteGBResponse, String gbrids, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response, String versionTag) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + private Response downloadDatafiles(ContainerRequestContext crc, List fileIdsList, boolean donotwriteGBResponse, String gbrids, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response, String versionTag) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { final long zipDownloadSizeLimit = systemConfig.getZipDownloadLimit(); logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes."); - if (body == null || body.equals("")) { + if (fileIdsList == null || fileIdsList.isEmpty()) { throw new BadRequestException(); } - String[] fileIdParams = getFileIdsCSV(body); - List fileIdsList = new ArrayList<>(Arrays.asList(fileIdParams)); - /* Note - fileIds coming from the POST ends in '\n' and a ',' has been added after the last file id number and before a - * final '\n' - this stops the last item from being parsed in the fileIds.split(","); line below. - */ - String customZipServiceUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl); boolean useCustomZipService = customZipServiceUrl != null; @@ -1087,32 +1090,22 @@ private Response downloadDatafiles(ContainerRequestContext crc, String body, boo } if(!selectedDataFiles.isEmpty()) { - Boolean guestbookResponseRequired = checkGuestbookRequiredResponse(user, uriInfo, selectedDataFiles.getFirst(), gbrids); - logger.fine("Downloading" + fileIdParams.length + " files. GBR required: " + guestbookResponseRequired); + Boolean guestbookResponseRequired = checkGuestbookRequiredResponse(user, uriInfo, selectedDataFiles.getFirst().getOwner(), gbrids); + logger.fine("Downloading" + fileIdsList.size() + " files. GBR required: " + guestbookResponseRequired); for (DataFile df : selectedDataFiles) { if (isAccessAuthorized(user, df)) { authorizedDatafiles.add(df); } } - if (!donotwriteGBResponse) { - if (guestbookResponseRequired) { - try { - GuestbookResponse gbr = getGuestbookResponseFromBody(authorizedDatafiles.getFirst(), GuestbookResponse.DOWNLOAD, body, user); - if (gbr != null) { - fileDownloadService.writeGuestbookResponseRecords(gbr, authorizedDatafiles); - } else { - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", getGuestbookIdFromDatafile(authorizedDatafiles.getFirst()))); - } - } catch (JsonParseException ex) { - List args = Arrays.asList(authorizedDatafiles.getFirst().getDisplayName(), ex.getLocalizedMessage()); - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbook.commandError", args)); - } - - } else { - GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(authorizedDatafiles.getFirst().getOwner(), authorizedDatafiles.getFirst(), session, user); - fileDownloadService.writeGuestbookResponseRecords(gbr, authorizedDatafiles); - } + if (authorizedDatafiles.isEmpty()) { + throw new ForbiddenException(); + } + if (guestbookResponseRequired) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", getGuestbookIdFromDatafile(authorizedDatafiles.getFirst()))); + } else if (!donotwriteGBResponse) { + GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(authorizedDatafiles.getFirst().getOwner(), authorizedDatafiles.getFirst(), session, user); + fileDownloadService.writeGuestbookResponseRecords(gbr, authorizedDatafiles); //We've written the gb responses if needed donotwriteGBResponse = true; } @@ -1121,7 +1114,7 @@ private Response downloadDatafiles(ContainerRequestContext crc, String body, boo if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(user, customZipServiceUrl, fileIdParams, uriInfo, headers, donotwriteGBResponse, true); + redirect_uri = handleCustomZipDownload(user, customZipServiceUrl, fileIdsList, uriInfo, headers, donotwriteGBResponse, true); } catch (WebApplicationException wae) { throw wae; } @@ -1985,13 +1978,13 @@ public Response getUserPermissionsOnFile(@Context ContainerRequestContext crc, @ return ok(jsonObjectBuilder); } - private boolean checkGuestbookRequiredResponse(User user, UriInfo uriInfo, DataFile df, String gbrids) throws WebApplicationException { + private boolean checkGuestbookRequiredResponse(User user, UriInfo uriInfo, Dataset ds, String gbrids) throws WebApplicationException { // Check if guestbook response is required - boolean required = df.getOwner().hasEnabledGuestbook(); + boolean required = ds.hasEnabledGuestbook(); boolean wasWrittenInPost = false; if (required) { User requestor = getRequestor(user); - if (requestor instanceof AuthenticatedUser && permissionService.userOn(requestor, df.getOwner()).has(Permission.EditDataset)) { + if (requestor instanceof AuthenticatedUser && permissionService.userOn(requestor, ds).has(Permission.EditDataset)) { required = false; } // Check if we are downloading a thumbnail image which doesn't require a guestbook response @@ -2008,7 +2001,7 @@ private boolean checkGuestbookRequiredResponse(User user, UriInfo uriInfo, DataF throw new NotFoundException("GuestbookResponse Not Found for id:" + gbrids); } Long delta = Instant.now().toEpochMilli() - gbr.getResponseTime().getTime(); - wasWrittenInPost = gbr.getDataset().getId().equals(df.getOwner().getId()) && delta <= (GUESTBOOK_RESPONSE_SIGNEDURL_TIMEOUT_MINUTES * 60000L); + wasWrittenInPost = gbr.getDataset().getId().equals(ds.getId()) && delta <= (GUESTBOOK_RESPONSE_SIGNEDURL_TIMEOUT_MINUTES * 60000L); } catch (NumberFormatException | DateTimeParseException ex) { throw new BadRequestException(ex.getMessage()); } @@ -2166,7 +2159,7 @@ private boolean isAccessAuthorized(User requestUser, DataFile df) { return false; } - private URI handleCustomZipDownload(User user, String customZipServiceUrl, String[] fileIdParams, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { + private URI handleCustomZipDownload(User user, String customZipServiceUrl, List fileIdsList, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { String zipServiceKey = null; Timestamp timestamp = null; @@ -2175,14 +2168,14 @@ private URI handleCustomZipDownload(User user, String customZipServiceUrl, Strin int validFileCount = 0; int downloadAuthCount = 0; - if (fileIdParams == null || fileIdParams.length == 0) { + if (fileIdsList == null || fileIdsList.isEmpty()) { throw new BadRequestException(); } - for (int i = 0; i < fileIdParams.length; i++) { + for (int i = 0; i < fileIdsList.size(); i++) { Long fileId = null; try { - fileId = Long.parseLong(fileIdParams[i]); + fileId = Long.parseLong(fileIdsList.get(i)); validIdCount++; } catch (NumberFormatException nfe) { fileId = null; From afcd1f7a71653fd19e828c07d28465416517fbb5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 19 Jun 2026 16:13:58 -0400 Subject: [PATCH 05/10] update custom zip logic for LFAIR, bulk gbr write --- .../edu/harvard/iq/dataverse/api/Access.java | 77 +++++-------------- 1 file changed, 21 insertions(+), 56 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index eeb3b2bf5bb..f0f12949760 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -1114,7 +1114,7 @@ private Response downloadDatafiles(ContainerRequestContext crc, List fil if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(user, customZipServiceUrl, fileIdsList, uriInfo, headers, donotwriteGBResponse, true); + redirect_uri = handleCustomZipDownload(user, customZipServiceUrl, authorizedDatafiles, uriInfo, headers, donotwriteGBResponse, getOrig); } catch (WebApplicationException wae) { throw wae; } @@ -2159,73 +2159,38 @@ private boolean isAccessAuthorized(User requestUser, DataFile df) { return false; } - private URI handleCustomZipDownload(User user, String customZipServiceUrl, List fileIdsList, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { - - String zipServiceKey = null; - Timestamp timestamp = null; + private URI handleCustomZipDownload(User user, String customZipServiceUrl, List authorizedDatafiles, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { - int validIdCount = 0; - int validFileCount = 0; - int downloadAuthCount = 0; + String zipServiceKey = null; + Timestamp timestamp = null; - if (fileIdsList == null || fileIdsList.isEmpty()) { - throw new BadRequestException(); + if (authorizedDatafiles == null || authorizedDatafiles.isEmpty()) { + throw new ForbiddenException(); } - - for (int i = 0; i < fileIdsList.size(); i++) { - Long fileId = null; - try { - fileId = Long.parseLong(fileIdsList.get(i)); - validIdCount++; - } catch (NumberFormatException nfe) { - fileId = null; - } - if (fileId != null) { - DataFile file = dataFileService.find(fileId); - if (file != null) { - validFileCount++; - if (isAccessAuthorized(user, file)) { - logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); - if (donotwriteGBResponse != true && file.isReleased()) { - GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, user); - guestbookResponseService.save(gbr); - MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, file); - mdcLogService.logEntry(entry); - } - if (zipServiceKey == null) { - zipServiceKey = fileDownloadService.generateServiceKey(); - } - if (timestamp == null) { - timestamp = new Timestamp(new Date().getTime()); - } + if (!donotwriteGBResponse) { + GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(authorizedDatafiles.getFirst().getOwner(), authorizedDatafiles.getFirst(), session, user); + fileDownloadService.writeGuestbookResponseRecords(gbr, authorizedDatafiles); + } - fileDownloadService.addFileToCustomZipJob(zipServiceKey, file, timestamp, true); - downloadAuthCount++; - } - } + for (DataFile file : authorizedDatafiles) { + logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); + + if (zipServiceKey == null) { + zipServiceKey = fileDownloadService.generateServiceKey(); + } + if (timestamp == null) { + timestamp = new Timestamp(new Date().getTime()); } - } - if (validIdCount == 0) { - throw new BadRequestException(); - } - - if (validFileCount == 0) { - // no supplied id translated into an existing DataFile - throw new NotFoundException(); + fileDownloadService.addFileToCustomZipJob(zipServiceKey, file, timestamp, orig); } - - if (downloadAuthCount == 0) { - // none of the DataFiles were authorized for download - throw new ForbiddenException(); - } - + URI redirectUri = null; try { redirectUri = new URI(customZipServiceUrl + "?" + zipServiceKey); } catch (URISyntaxException use) { - throw new BadRequestException(); + throw new BadRequestException(); } return redirectUri; } From 0d6b17a392a35ee86c311a72385bdda0afde9f62 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 23 Jun 2026 09:47:26 -0400 Subject: [PATCH 06/10] bug fixes --- src/main/java/edu/harvard/iq/dataverse/api/Access.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index f0f12949760..c3fc4b5fa5f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -494,7 +494,7 @@ private Response processDatafilesWithGuestbookResponse(ContainerRequestContext c // Check if requesting datafile or all files within dataset using :persistentId // If so, we need the datafile/dataset id to create a canonical signed URL String idString = null; - if (!uriInfo.getPath().toLowerCase().contains(":persitentId")) { + if (!uriInfo.getPath().toLowerCase().contains(":persistentId")) { if (!uriInfo.getPath().toLowerCase().contains("/dataset/")) { // Should only be one file if :persistentId is being used idString = datafiles.stream().map(df -> String.valueOf(df.getId())).collect(Collectors.joining(",")); @@ -569,7 +569,7 @@ private Response returnSignedDownloadUrl(ContainerRequestContext crc, UriInfo ur if (gbrids != null && !gbrids.isEmpty()) { builder.replaceQueryParam("gbrids", gbrids); } - builder.replaceQueryParam("persistentId", (Object) null); // remove this as a parm and add the id to the path + builder.replaceQueryParam("persistentId"); // remove this as a parm and add the id to the path crc.setProperty("gbrids", gbrids); String baseUrlEncoded = builder.build().toString(); String baseUrl = URLDecoder.decode(baseUrlEncoded, StandardCharsets.UTF_8); From 03a7033c2211230d4491bb9953a2cbb7a64724dd Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 23 Jun 2026 12:36:46 -0400 Subject: [PATCH 07/10] fix null custom Q responses and oneToMany FileAccessRequest declares the realtionship to GuestbookReponse as OneToOne. The OneToMany here appears to be a typo (without any real impact) from #9599 --- .../iq/dataverse/FileDownloadServiceBean.java | 3 +++ .../iq/dataverse/GuestbookResponse.java | 22 +++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 3def42fa7cb..4a2b6c4819f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -226,7 +226,10 @@ public void writeGuestbookResponseAndRequestAccess(GuestbookResponse guestbookRe int countRequestAccessSuccess = 0; + // ToDo - rewrite to avoid 1 transaction per guestbookresponse/file for(DataFile dataFile : selectedDataFiles){ + // The guestbookResponse can be reused because the final save() of + // the guestbookResponse is in a new transaction. (Otherwise we'd need to make a copy) guestbookResponse.setDataFile(dataFile); writeGuestbookResponseRecordForRequestAccess(guestbookResponse); if(requestAccess(dataFile,guestbookResponse)){ diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index a6ac270b45c..53bd92fe68d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -68,13 +68,12 @@ public class GuestbookResponse implements Serializable { @JoinColumn(nullable=true) private AuthenticatedUser authenticatedUser; - @OneToMany(mappedBy="guestbookResponse",cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST},fetch = FetchType.LAZY) - //private FileAccessRequest fileAccessRequest; - private List fileAccessRequests; + @OneToOne(mappedBy="guestbookResponse",cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST},fetch = FetchType.LAZY) + private FileAccessRequest fileAccessRequest; @OneToMany(mappedBy="guestbookResponse",cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST},orphanRemoval=true) @OrderBy ("id") - private List customQuestionResponses; + private List customQuestionResponses = new ArrayList<>(); @Size(max = 255, message = "{guestbook.response.nameLength}") private String name; @@ -258,7 +257,9 @@ public List getCustomQuestionResponses() { } public List getCustomQuestionResponsesSorted(){ - + if (customQuestionResponses == null) { + customQuestionResponses = new ArrayList<>(); + } Collections.sort(customQuestionResponses, (CustomQuestionResponse cqr1, CustomQuestionResponse cqr2) -> { int a = cqr1.getCustomQuestion().getDisplayOrder(); int b = cqr2.getCustomQuestion().getDisplayOrder(); @@ -270,15 +271,18 @@ public List getCustomQuestionResponsesSorted(){ } public void setCustomQuestionResponses(List customQuestionResponses) { + if(customQuestionResponses == null) { + customQuestionResponses = new ArrayList<>(); + } this.customQuestionResponses = customQuestionResponses; } - public List getFileAccessRequests(){ - return fileAccessRequests; + public FileAccessRequest getFileAccessRequest(){ + return fileAccessRequest; } - public void setFileAccessRequest(List fARs){ - this.fileAccessRequests = fARs; + public void setFileAccessRequest(FileAccessRequest fAR){ + this.fileAccessRequest = fAR; } public Dataset getDataset() { From 19ab316f6b3c05d144667eaffb196d6a9828237b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 23 Jun 2026 15:40:22 -0400 Subject: [PATCH 08/10] fix using separate transaction --- .../edu/harvard/iq/dataverse/FileDownloadServiceBean.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 4a2b6c4819f..eeaae577258 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -73,6 +73,9 @@ public class FileDownloadServiceBean implements java.io.Serializable { @EJB MailServiceBean mailService; + @EJB + FileDownloadServiceBean self; + @Inject DataverseSession session; @@ -268,7 +271,7 @@ public List writeGuestbookResponseRecords(GuestbookResponse guestbookRes perFileResponse.setDataFile(dataFile); responsesToPersist.add(perFileResponse); } - List savedIds = saveGuestbookResponseRecordsAndMDCLogEntries(responsesToPersist); + List savedIds = self.saveGuestbookResponseRecordsAndMDCLogEntries(responsesToPersist); return savedIds; } From de08455165af5ef4a0ef4eb2ebd4431eb50fa45c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 23 Jun 2026 17:08:04 -0400 Subject: [PATCH 09/10] fix fileID listing/avoid retrieving objects --- .../iq/dataverse/DataFileServiceBean.java | 12 ++++++++--- .../dataverse/DatasetVersionServiceBean.java | 4 ++-- .../edu/harvard/iq/dataverse/api/Access.java | 21 +++++++++++-------- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 7af16372f40..46e02f29eb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -283,14 +283,20 @@ public List findFileMetadataByDatasetVersionId(Long datasetVersion .getResultList(); } + public List findDataFileIdsByDatasetVersionId(Long datasetVersionId) { + return findDataFileIdsByDatasetVersionIdLabelSearchTerm(datasetVersionId, null, null, null); + } + public List findDataFileIdsByDatasetVersionIdLabelSearchTerm(Long datasetVersionId, String userSuppliedSearchTerm, String userSuppliedSortField, String userSuppliedSortOrder) { - FileSortFieldAndOrder sortFieldAndOrder = new FileSortFieldAndOrder(userSuppliedSortField, userSuppliedSortOrder); String searchTerm = !StringUtils.isBlank(userSuppliedSearchTerm) ? "%"+userSuppliedSearchTerm.trim().toLowerCase()+"%" : null; String selectClause = "select o.datafile_id from FileMetadata o where o.datasetversion_id = " + datasetVersionId; String searchClause = searchTerm != null ? " and (lower(o.label) like ? or lower(o.description) like ?)" : ""; - String orderByClause = " order by o." + sortFieldAndOrder.getSortField() + " " + sortFieldAndOrder.getSortOrder(); - + String orderByClause = ""; + if(StringUtils.isNotBlank(userSuppliedSortField) || StringUtils.isNotBlank(userSuppliedSortOrder)) { + FileSortFieldAndOrder sortFieldAndOrder = new FileSortFieldAndOrder(userSuppliedSortField, userSuppliedSortOrder); + orderByClause = " order by o." + sortFieldAndOrder.getSortField() + " " + sortFieldAndOrder.getSortOrder(); + } Query query = em.createNativeQuery(selectClause + searchClause + orderByClause); if (searchTerm != null) { query.setParameter(1, searchTerm); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index dcc9995ef35..cb9a260fdb1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1279,8 +1279,7 @@ public DatasetVersion merge( DatasetVersion ver ) { /** * Execute a query to return DatasetVersion * - * @param queryString - * @return + * @return */ public List getUnarchivedDatasetVersions(){ @@ -1359,4 +1358,5 @@ public void persistArchivalCopyLocation(DatasetVersion dv) { logger.log(Level.SEVERE, "Could not find DatasetVersion with id={0} to retry persisting archival copy location after OptimisticLockException.", dv.getId()); } } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index c3fc4b5fa5f..d2a18a932a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -843,7 +843,7 @@ public Response downloadAllFromLatest(@Context ContainerRequestContext crc, @Pat // is that we know that guest never has the Permission.ViewUnpublishedDataset. final DatasetVersion draft = versionService.getDatasetVersionById(retrieved.getId(), DatasetVersion.VersionState.DRAFT.toString()); if (draft != null && permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { - List fileIds = getFileIdsList(draft.getFileMetadatas()); + List fileIds = getFileIdsList(draft); // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: @@ -867,7 +867,7 @@ public Response downloadAllFromLatest(@Context ContainerRequestContext crc, @Pat //throw new NotFoundException(); } - List fileIdsList = getFileIdsList(latest.getFileMetadatas()); + List fileIdsList = getFileIdsList(latest); return downloadDatafiles(crc, fileIdsList, gbrecs, gbrids, uriInfo, headers, response, latest.getFriendlyVersionNumber()); } catch (WrappedResponse wr) { return wr.getResponse(); @@ -888,14 +888,14 @@ public Response downloadAllFromLatestWithGuestbookResponse(@Context ContainerReq if (!(user instanceof GuestUser)) { final DatasetVersion draft = versionService.getDatasetVersionById(retrieved.getId(), DatasetVersion.VersionState.DRAFT.toString()); if (draft != null && permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { - fileIdsList = getFileIdsList(draft.getFileMetadatas()); + fileIdsList = getFileIdsList(draft); gbrecs = true; version = "draft"; } } if (version == null) { final DatasetVersion latest = versionService.getLatestReleasedVersionFast(retrieved.getId()); - fileIdsList = getFileIdsList(latest.getFileMetadatas()); + fileIdsList = getFileIdsList(latest); version = latest.getFriendlyVersionNumber(); } return processDatafilesWithGuestbookResponse(crc, req, headers, fileIdsList, uriInfo, gbrecs, jsonBody); @@ -919,7 +919,7 @@ public Response downloadAllFromVersion(@Context ContainerRequestContext crc, @Pa // -- L.A.) return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.exception.version.not.found")); } - List fileIdsList = getFileIdsList(dsv.getFileMetadatas()); + List fileIdsList = getFileIdsList(dsv); // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: @@ -943,7 +943,7 @@ public Response downloadAllFromVersionWithGuestbookResponse(@Context ContainerRe try { DataverseRequest req = createDataverseRequest(getRequestUser(crc)); DatasetVersion dsv = getDatasetVersionFromVersion(crc, datasetIdOrPersistentId, versionId); - List fileIdsList = getFileIdsList(dsv.getFileMetadatas()); + List fileIdsList = getFileIdsList(dsv); return processDatafilesWithGuestbookResponse(crc, req, headers, fileIdsList, uriInfo, gbrecs, jsonBody); } catch (WrappedResponse wr) { return wr.getResponse(); @@ -977,10 +977,13 @@ public Command handleLatestPublished() { })); } - private static List getFileIdsList(List fileMetadatas) { - return fileMetadatas.stream().map(fileMetadata -> fileMetadata.getId().toString()).collect(Collectors.toList()); + private List getFileIdsList(DatasetVersion dsv) { + return dataFileService.findDataFileIdsByDatasetVersionId(dsv.getId()) + .stream() + .map(String::valueOf) + .collect(Collectors.toList()); } - + private String generateMultiFileBundleName(Dataset dataset, String versionTag) { String bundleName = DEFAULT_BUNDLE_NAME; From fd0694f173f246c605c1f85d6c29a17ee6835ce0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 24 Jun 2026 15:17:28 -0400 Subject: [PATCH 10/10] release note --- doc/release-notes/12479-write-guestbook-improvements.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/12479-write-guestbook-improvements.md diff --git a/doc/release-notes/12479-write-guestbook-improvements.md b/doc/release-notes/12479-write-guestbook-improvements.md new file mode 100644 index 00000000000..709240fba20 --- /dev/null +++ b/doc/release-notes/12479-write-guestbook-improvements.md @@ -0,0 +1 @@ +The performance when saving counts/guestbook responses has been improved (particularly for datasets with large numbers of files).