diff --git a/doc/release-notes/12200-async-reindexing-permissions.md b/doc/release-notes/12200-async-reindexing-permissions.md new file mode 100644 index 00000000000..4ca976b3331 --- /dev/null +++ b/doc/release-notes/12200-async-reindexing-permissions.md @@ -0,0 +1,9 @@ +# Asynchronous Permissions Reindexing + +The previously undocumented Solr permissions reindexing API endpoints have been improved. + +- The endpoints are `/api/admin/index/perms` (asynchronous, all objects) and `/api/admin/index/perms/{id}` (synchronous, single object) now use POST instead of GET +- Both endpoints require superuser access. +- For the asynchronous reindex all endpoint, if an indexing process is already in progress, the API will return a 409 Conflict status. +- The asynchronous reindex all endpoint no longer runs as a single transaction. This avoids potential timeouts in larger installations. +- These endpoints are now documented in the Solr Search Index section of the Admin Guide. diff --git a/doc/sphinx-guides/source/admin/solr-search-index.rst b/doc/sphinx-guides/source/admin/solr-search-index.rst index 3f7b9d5b547..27eb7fab6f4 100644 --- a/doc/sphinx-guides/source/admin/solr-search-index.rst +++ b/doc/sphinx-guides/source/admin/solr-search-index.rst @@ -96,6 +96,37 @@ This API will clear the Solr entry for the dataset specified. It can be useful i This can be reversed of course by re-indexing the dataset with the API above. +Reindexing Permissions +---------------------- + +It is possible to just re-index the permissions on Solr entries without re-indexing the content. + +Reindexing Permissions for All Objects +++++++++++++++++++++++++++++++++++++++ + +Re-index permissions for all Dataverse collections, datasets, and files. This is an asynchronous operation that may take a long time to complete on large installations. This endpoint requires the API token of a superuser. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + curl -X POST -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/admin/index/perms" + +If indexing is already in progress, the API will return a 409 ("CONFLICT") response. Otherwise, it will return a 200 ("OK") response and the indexing process will start in the background. Check the server logs for progress. + +Reindexing Permissions for a Single Object +++++++++++++++++++++++++++++++++++++++++++ + +It is also possible to re-index permissions for a single object by database ID: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=42 + curl -X POST -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/admin/index/perms/$ID" + +This operation is performed synchronously. + + Manually Querying Solr ---------------------- diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index fb346e96821..10e368da51f 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -7,9 +7,16 @@ This API changelog is experimental and we would love feedback on its usefulness. :local: :depth: 1 -v6.11 +v6.12 ----- +- The permission reindexing endpoints have been updated to use ``POST`` and require superuser access. They are now documented in the :doc:`/admin/solr-search-index` guide. + + - **/api/admin/index/perms** + + - **/api/admin/index/perms/{id}** +v6.11 +----- - The GET /api/mydata/retrieve, if the search returns no data, now includes the "data" block with 0 results. The message that was returned in "error_message" will be returned in "message" and the "success" will be `true`. All other errors will continue to reply with "success":false and the error message in "error_message". - The endpoints GET, PUT AND DELETE for `/api/admin/dataverse/{alias}/storageDriver` have been moved to `/api/dataverses/{alias}/storageDriver`. - The endpoint `/api/admin/dataverse/storageDrivers` has been moved and renamed to `/api/dataverses/{alias}/allowedStorageDrivers`. Regarding the change of the name, this endpoint will in the future only display the storageDrivers that are allowed on the specified collection, as of now, it will display the entire list of available Drivers on the installation. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java index f83506c7e27..8a21de8e296 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Index.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Index.java @@ -59,6 +59,7 @@ import jakarta.validation.ConstraintViolationException; import jakarta.ws.rs.DELETE; import jakarta.ws.rs.GET; +import jakarta.ws.rs.POST; import jakarta.ws.rs.Path; import jakarta.ws.rs.PathParam; import jakarta.ws.rs.QueryParam; @@ -369,22 +370,43 @@ public Response indexMod(@QueryParam("partitions") long partitions, @QueryParam( return ok(response); } - @GET + @POST + @AuthRequired @Path("perms") - public Response indexAllPermissions() { - IndexResponse indexResponse = solrIndexService.indexAllPermissions(); - return ok(indexResponse.getMessage()); + public Response indexAllPermissions(@Context ContainerRequestContext crc) { + try { + AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); + if (!user.isSuperuser()) { + return error(Status.FORBIDDEN, "Superusers only."); + } + if (solrIndexService.isIndexingPermissionsInProgress()) { + return conflict("Asynchronous indexing of all permissions is already in progress."); + } + solrIndexService.asyncIndexAllPermissions(); + return ok("Asynchronous indexing of all permissions has been started. Check the server logs for progress."); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } } - @GET + @POST + @AuthRequired @Path("perms/{id}") - public Response indexPermissions(@PathParam("id") Long id) { - DvObject dvObject = dvObjectService.findDvObject(id); - if (dvObject == null) { - return error(Status.BAD_REQUEST, "Could not find DvObject based on id " + id); - } else { - IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dvObject); - return ok(indexResponse.getMessage()); + public Response indexPermissions(@Context ContainerRequestContext crc, @PathParam("id") Long id) { + try { + AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); + if (!user.isSuperuser()) { + return error(Status.FORBIDDEN, "Superusers only."); + } + DvObject dvObject = dvObjectService.findDvObject(id); + if (dvObject == null) { + return error(Status.BAD_REQUEST, "Could not find DvObject based on id " + id); + } else { + IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dvObject); + return ok(indexResponse.getMessage()); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); } } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index 90282712060..2c9040dd657 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -23,10 +23,12 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Stream; +import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; @@ -47,6 +49,8 @@ public class SolrIndexServiceBean { private static final Logger logger = Logger.getLogger(SolrIndexServiceBean.class.getCanonicalName()); + private static final AtomicBoolean indexingInProgress = new AtomicBoolean(false); + @EJB private SolrIndexServiceBean self; // Self-injection to allow calling methods in new transactions (from other methods in this bean) @@ -103,20 +107,6 @@ public List determineSolrDocs(DvObject dvObject) { return solrDocs; } - private List determineSolrDocsForFilesFromDataset(Map.Entry> datasetHash) { - List emptyList = new ArrayList<>(); - List solrDocs = emptyList; - DvObject dvObject = dvObjectService.findDvObject(datasetHash.getKey()); - if (dvObject == null) { - return emptyList; - } - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - solrDocs.addAll(constructDatafileSolrDocsFromDataset(dataset)); - } - return solrDocs; - } - /** * @todo should this method return a List? The equivalent methods for * datasets and files return lists. @@ -150,24 +140,23 @@ private DvObjectSolrDoc constructDatafileSolrDoc(DataFileProxy fileProxy, List constructDatafileSolrDocsFromDataset(Dataset dataset) { - List datafileSolrDocs = new ArrayList<>(); - for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) { - List perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo); - - for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) { - Long fileId = fileMetadata.getDataFile().getId(); - String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId; - String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); - String solrId = solrIdStart + solrIdEnd; - DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms); - logger.finest("adding fileid " + fileId); - datafileSolrDocs.add(dataFileSolrDoc); - } +private List constructDatafileSolrDocsFromDataset(Dataset dataset) { + List datafileSolrDocs = new ArrayList<>(); + for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) { + List perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo); + + for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) { + Long fileId = fileMetadata.getDataFile().getId(); + String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId; + String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); + String solrId = solrIdStart + solrIdEnd; + DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms); + logger.finest("adding fileid " + fileId); + datafileSolrDocs.add(dataFileSolrDoc); } - return datafileSolrDocs; } - + return datafileSolrDocs; +} /** Find the versions to index. The overall logic is * If there is only one version, or no released version (all non-draft versions are deaccessioned) * then index it regardless of it's versionstate @@ -213,60 +202,70 @@ private String getDatasetOrDataFileSolrEnding(DatasetVersion.VersionState versio } } - public IndexResponse indexAllPermissions() { - Collection docs = new ArrayList<>(); + public boolean isIndexingPermissionsInProgress() { + return indexingInProgress.get(); + } - List definitionPoints = new ArrayList<>(); - Map> filesPerDataset = new HashMap<>(); - List allExceptFiles = dvObjectService.findAll(); - for (DvObject dvObject : allExceptFiles) { - logger.fine("determining definition points for dvobject id " + dvObject.getId()); - if (dvObject.isInstanceofDataFile()) { - Long dataset = dvObject.getOwner().getId(); - Long datafile = dvObject.getId(); - - List files = filesPerDataset.get(dataset); - if (files == null) { - files = new ArrayList<>(); - filesPerDataset.put(dataset, files); - } - files.add(datafile); - } else { - definitionPoints.addAll(determineSolrDocs(dvObject)); - } + @Asynchronous + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + public void asyncIndexAllPermissions() { + if (!indexingInProgress.compareAndSet(false, true)) { + logger.info("Asynchronous indexing of all permissions is already in progress. Skipping this invocation."); + return; } - List all = allExceptFiles; - for (Map.Entry> filePerDataset : filesPerDataset.entrySet()) { - definitionPoints.addAll(determineSolrDocsForFilesFromDataset(filePerDataset)); - for (long fileId : filePerDataset.getValue()) { - DvObject file = dvObjectService.findDvObject(fileId); - if (file != null) { - all.add(file); - } - } - } + logger.info("Starting asynchronous indexing of all permissions"); + long startTime = System.currentTimeMillis(); - for (DvObjectSolrDoc dvObjectSolrDoc : definitionPoints) { - logger.fine("creating solr doc in memory for " + dvObjectSolrDoc.getSolrId()); - SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(dvObjectSolrDoc); - logger.fine("adding to list of docs to index " + dvObjectSolrDoc.getSolrId()); - docs.add(solrInputDocument); - } try { - persistToSolr(docs); - /** - * @todo Do we need a separate permissionIndexTime timestamp? - * Probably. Update it here. - */ - for (DvObject dvObject : all) { - dvObjectService.updatePermissionIndexTime(dvObject); + + // Get ALL dataverses in the system + List allDataverseIds = em.createQuery( + "SELECT d.id FROM Dataverse d ORDER BY d.id", Long.class) + .getResultList(); + + logger.info("Found " + allDataverseIds.size() + " dataverses to index (each will index its datasets and files)"); + + int processedCount = 0; + + // Index each dataverse (which will automatically index all its datasets and files) + for (Long dataverseId : allDataverseIds) { + try { + Dataverse dataverse = dataverseService.find(dataverseId); + if (dataverse == null) { + logger.warning("Dataverse not found: " + dataverseId); + continue; + } + + logger.fine("Indexing permissions for Dataverse " + dataverseId + + " (" + dataverse.getName() + ") and all its datasets/files"); + + // This will index the dataverse itself and all its direct dataset children (with their files) + IndexResponse response = indexPermissionsOnSelfAndChildren(dataverse); + processedCount++; + + logger.fine("Indexed Dataverse " + dataverseId + ": " + response.getMessage()); + + // Clear persistence context periodically to free memory + if (processedCount % 10 == 0) { + em.clear(); + logger.info("Processed " + processedCount + "/" + allDataverseIds.size() + " dataverses"); + } + + } catch (Exception e) { + logger.log(Level.WARNING, "Error indexing permissions for dataverse " + dataverseId, e); + } } - return new IndexResponse("indexed all permissions"); - } catch (SolrServerException | IOException ex) { - return new IndexResponse("problem indexing"); - } + long duration = System.currentTimeMillis() - startTime; + logger.info("Completed asynchronous indexing of all permissions. Processed " + + processedCount + " dataverses (with all their datasets and files) in " + duration + "ms"); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Error during asynchronous permission indexing", e); + } finally { + indexingInProgress.set(false); + } } public IndexResponse indexPermissionsForOneDvObject(DvObject dvObject) {