diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 95eef02a..b9a11d34 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -38,11 +38,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +56,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v3 + uses: github/codeql-action/autobuild@v4 # Command-line programs to run using the OS shell. # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -69,4 +69,4 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index fe8a5ce3..008461e5 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -25,21 +25,21 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up openJDK version - uses: actions/setup-java@v4 + uses: actions/setup-java@v5 with: java-version: ${{ matrix.jdk }} distribution: 'zulu' - name: Install Dependencies run: npm install -g ro-crate-html-js - name: Setup Gradle - uses: gradle/actions/setup-gradle@v4 + uses: gradle/actions/setup-gradle@v5 - name: Build and Test with Gradle run: ./gradlew -Dprofile=release build - name: Upload (test) reports as artifact on GitHub on manual runs if: github.event_name == 'workflow_dispatch' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: test-report ${{ matrix.os }} JDK ${{ matrix.jdk }} path: build/reports diff --git a/.github/workflows/publishRelease.yml b/.github/workflows/publishRelease.yml index b70d034c..dedd9bce 100644 --- a/.github/workflows/publishRelease.yml +++ b/.github/workflows/publishRelease.yml @@ -8,9 +8,9 @@ jobs: publish: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Java - uses: actions/setup-java@v4 + uses: actions/setup-java@v5 with: java-version: 21 distribution: 'zulu' # openjdk diff --git a/.gitignore b/.gitignore index 2e6ac719..dc7ed9cd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +.classpath +.project +.settings +.tmp + ### VSCode ### .vscode/* #!.vscode/settings.json diff --git a/build.gradle b/build.gradle index ae540446..8ac1e3cf 100644 --- a/build.gradle +++ b/build.gradle @@ -13,10 +13,10 @@ plugins { // Publishing of JAR to Nexus instances (e.g., OSSRH) // https://github.com/gradle-nexus/publish-plugin id "io.github.gradle-nexus.publish-plugin" version "2.0.0" - id "io.freefair.maven-publish-java" version "8.13.1" + id "io.freefair.maven-publish-java" version "9.0.0" } -group 'edu.kit.datamanager' +group = 'edu.kit.datamanager' description = "A library for easy creation and modification of valid RO-Crates." println "Running gradle version: $gradle.gradleVersion" @@ -24,8 +24,10 @@ println "Building ${name} version: ${version}" println "JDK version: ${JavaVersion.current()}" println "Profile (system property): ${System.getProperty('profile')}" -sourceCompatibility = JavaVersion.VERSION_17 -targetCompatibility = JavaVersion.VERSION_17 +java { + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 +} if (JavaVersion.current() == JavaVersion.VERSION_17) { println "Setting encoding to UTF-8 manually" @@ -38,12 +40,12 @@ repositories { } ext { - jacksonVersion = '2.19.0' + jacksonVersion = '2.20.1' } dependencies { // JUnit setup for testing - testImplementation(platform("org.junit:junit-bom:5.13.0")) + testImplementation(platform("org.junit:junit-bom:6.0.1")) testImplementation('org.junit.jupiter:junit-jupiter') testRuntimeOnly('org.junit.platform:junit-platform-launcher') // JSON object mapping / (de-)serialization @@ -52,19 +54,19 @@ dependencies { // http client implementation group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.14' // common file system operations - implementation group: 'commons-io', name: 'commons-io', version: '2.19.0' + implementation group: 'commons-io', name: 'commons-io', version: '2.21.0' // read from and write to zip files implementation group: 'net.lingala.zip4j', name: 'zip4j', version: '2.11.5' // compare json documents in tests - implementation 'com.github.fslev:json-compare:7.0' + implementation 'com.github.fslev:json-compare:7.1' // url validator - implementation group: 'commons-validator', name: 'commons-validator', version: '1.9.0' + implementation group: 'commons-validator', name: 'commons-validator', version: '1.10.0' // logging implementation group: 'org.slf4j', name: 'slf4j-jdk14', version: '2.0.17' // JSON-LD, Zenodo mapping - implementation group: 'com.apicatalog', name: 'titanium-json-ld', version: '1.6.0' + implementation group: 'com.apicatalog', name: 'titanium-json-ld', version: '1.7.0' // metadata validation, profiles based on JSON schema - implementation group: "com.networknt", name: "json-schema-validator", version: "1.5.7" + implementation group: "com.networknt", name: "json-schema-validator", version: "1.5.9" implementation 'org.glassfish:jakarta.json:2.0.1' //JTE for template processing implementation('gg.jte:jte:3.2.1') @@ -189,7 +191,7 @@ jacocoTestReport { } jacoco { - toolVersion = "0.8.13" + toolVersion = "0.8.14" } // maxParallelForks(2) diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index ff23a68d..d4081da4 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.14.2-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.14.3-bin.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java index 782ff601..652ccdcc 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java @@ -7,9 +7,12 @@ import java.util.Set; import edu.kit.datamanager.ro_crate.context.CrateMetadataContext; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognitionConfig; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognitionResult; import edu.kit.datamanager.ro_crate.entities.AbstractEntity; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity; import edu.kit.datamanager.ro_crate.preview.CratePreview; import edu.kit.datamanager.ro_crate.special.CrateVersion; @@ -103,15 +106,27 @@ public interface Crate { String getJsonMetadata(); - DataEntity getDataEntityById(java.lang.String id); + /** + * Gets a data entity by its ID. + * @param id the ID of the data entity + * @return the DataEntity with the specified ID or null if not found + */ + DataEntity getDataEntityById(String id); + + /** + * Gets a data set entity by its ID. + * @param id the ID of the data set entity + * @return the DataSetEntity with the specified ID or empty if not found + */ + Optional getDataSetById(String id); Set getAllDataEntities(); - ContextualEntity getContextualEntityById(java.lang.String id); + ContextualEntity getContextualEntityById(String id); Set getAllContextualEntities(); - AbstractEntity getEntityById(java.lang.String id); + AbstractEntity getEntityById(String id); /** * Adds a data entity to the crate. @@ -120,12 +135,31 @@ public interface Crate { */ void addDataEntity(DataEntity entity); + /** + * Adds a data entity to the crate with a specified parent ID. + *

+ * Consider using + * @param entity the DataEntity to add to this crate. + * @param parentId the ID of the parent entity. Must not be null. + * @throws IllegalArgumentException if parentId is null or not found, or not a DataEntity. + */ + void addDataEntity(DataEntity entity, String parentId) throws IllegalArgumentException; + void addContextualEntity(ContextualEntity entity); void deleteEntityById(String entityId); void setUntrackedFiles(Collection files); + /** + * Unsafely adds a collection of entities to the crate. + *

+ * WARNING: This method does not perform any checks and may lead to an inconsistent crate state. + * + * @param entities the collection of entities to add + * @deprecated use individual add methods to ensure crate consistency. If you really need an unchecked method, consider creating a subclass or contact us at our issue tracker so we can discuss replacements before removal. + */ + @Deprecated(forRemoval = true) void addFromCollection(Collection entities); void addItemFromDataCite(String locationUrl); @@ -135,4 +169,26 @@ public interface Crate { void deleteUrlFromContext(String url); Collection getUntrackedFiles(); + + /** + * Automatically recognizes hierarchical file structure from DataEntity IDs + * and connects them using hasPart relationships. + *

+ * WARNING: This will not change existing hasPart relationships. + * + * @param addInverseRelationships if true, also adds isPartOf relationships from child to parent + * @return result object containing information about what was processed, as well as potential errors. + */ + HierarchyRecognitionResult createDataEntityFileStructure(boolean addInverseRelationships); + + /** + * Automatically recognizes hierarchical file structure from DataEntity IDs + * and connects them using hasPart relationships with fine-grained configuration. + *

+ * Note: Only processes IDs that appear to be relative file paths. + * + * @param config configuration object specifying how the recognition should behave + * @return result object containing information about what was processed, as well as potential errors. + */ + HierarchyRecognitionResult createDataEntityFileStructure(HierarchyRecognitionConfig config); } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 8ebae01e..cb960b4a 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -4,14 +4,16 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; - import edu.kit.datamanager.ro_crate.context.CrateMetadataContext; import edu.kit.datamanager.ro_crate.context.RoCrateMetadataContext; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognition; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognitionConfig; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognitionResult; import edu.kit.datamanager.ro_crate.entities.AbstractEntity; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.contextual.JsonDescriptor; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; - +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity; import edu.kit.datamanager.ro_crate.externalproviders.dataentities.ImportFromDataCite; import edu.kit.datamanager.ro_crate.objectmapper.MyObjectMapper; @@ -23,7 +25,6 @@ import edu.kit.datamanager.ro_crate.special.JsonUtilFunctions; import edu.kit.datamanager.ro_crate.validation.JsonSchemaValidation; import edu.kit.datamanager.ro_crate.validation.Validator; - import java.io.File; import java.net.URI; import java.util.*; @@ -105,12 +106,12 @@ public ContextualEntity getJsonDescriptor() { public void setJsonDescriptor(ContextualEntity jsonDescriptor) { this.jsonDescriptor = jsonDescriptor; } - + @Override public RootDataEntity getRootDataEntity() { return rootDataEntity; } - + @Override public void setRootDataEntity(RootDataEntity rootDataEntity) { this.rootDataEntity = rootDataEntity; @@ -123,8 +124,7 @@ public RoCrate() { this.roCratePayload = new RoCratePayload(); this.untrackedFiles = new HashSet<>(); this.metadataContext = new RoCrateMetadataContext(); - rootDataEntity = new RootDataEntity.RootDataEntityBuilder() - .build(); + rootDataEntity = new RootDataEntity.RootDataEntityBuilder().build(); jsonDescriptor = new JsonDescriptor(); } @@ -150,12 +150,12 @@ public Optional getVersion() { JsonNode conformsTo = this.jsonDescriptor.getProperty("conformsTo"); if (conformsTo.isArray()) { return StreamSupport.stream(conformsTo.spliterator(), false) - .filter(TreeNode::isObject) - .map(obj -> obj.path("@id").asText()) - .map(CrateVersion::fromSpecUri) - .filter(Optional::isPresent) - .map(Optional::get) - .findFirst(); + .filter(TreeNode::isObject) + .map(obj -> obj.path("@id").asText()) + .map(CrateVersion::fromSpecUri) + .filter(Optional::isPresent) + .map(Optional::get) + .findFirst(); } else if (conformsTo.isObject()) { return CrateVersion.fromSpecUri(conformsTo.get("@id").asText()); } else { @@ -168,9 +168,9 @@ public Collection getProfiles() { JsonNode conformsTo = this.jsonDescriptor.getProperty("conformsTo"); if (conformsTo.isArray()) { return StreamSupport.stream(conformsTo.spliterator(), false) - .filter(TreeNode::isObject) - .map(obj -> obj.path("@id").asText()) - .collect(Collectors.toSet()); + .filter(TreeNode::isObject) + .map(obj -> obj.path("@id").asText()) + .collect(Collectors.toSet()); } else { return Collections.emptySet(); } @@ -184,11 +184,19 @@ public String getJsonMetadata() { node.setAll(this.metadataContext.getContextJsonEntity()); var graph = objectMapper.createArrayNode(); - ObjectNode root = objectMapper.convertValue(this.rootDataEntity, ObjectNode.class); + ObjectNode root = objectMapper.convertValue( + this.rootDataEntity, + ObjectNode.class + ); graph.add(root); - graph.add(objectMapper.convertValue(this.jsonDescriptor, JsonNode.class)); - if (this.roCratePayload != null && this.roCratePayload.getEntitiesMetadata() != null) { + graph.add( + objectMapper.convertValue(this.jsonDescriptor, JsonNode.class) + ); + if ( + this.roCratePayload != null && + this.roCratePayload.getEntitiesMetadata() != null + ) { graph.addAll(this.roCratePayload.getEntitiesMetadata()); } node.set("@graph", graph); @@ -196,10 +204,19 @@ public String getJsonMetadata() { } @Override - public DataEntity getDataEntityById(java.lang.String id) { + public DataEntity getDataEntityById(String id) { return this.roCratePayload.getDataEntityById(id); } + @Override + public Optional getDataSetById(String id) { + DataEntity data = this.roCratePayload.getDataEntityById(id); + if (data instanceof DataSetEntity) { + return Optional.of((DataSetEntity) data); + } + return Optional.empty(); + } + @Override public Set getAllDataEntities() { return new HashSet<>(this.roCratePayload.getAllDataEntities()); @@ -235,6 +252,45 @@ public void addDataEntity(DataEntity entity) { this.rootDataEntity.addToHasPart(entity.getId()); } + @Override + public void addDataEntity(DataEntity entity, String parentId) + throws IllegalArgumentException { + if (parentId == null) { + throw new IllegalArgumentException("Parent ID is null."); + } + + DataEntity parentEntity = parentId.equals("./") + ? this.getRootDataEntity() + : this.getDataEntityById(parentId); + + if (parentEntity == null) { + throw new IllegalArgumentException( + "Parent ID not found in the crate." + ); + } + + if (parentEntity.getTypes().contains("File")) { + throw new IllegalArgumentException( + "Parent entity cannot be a File." + ); + } + + if (!parentEntity.getTypes().contains("Dataset")) { + throw new IllegalArgumentException( + "Parent entity must be a Dataset in order to contain another DataEntity as a part." + ); + } + + this.metadataContext.checkEntity(entity); + + if (parentEntity instanceof DataSetEntity) { + ((DataSetEntity) parentEntity).addToHasPart(entity.getId()); + } else { + parentEntity.addProperty("hasPart", entity.getId()); + } + this.roCratePayload.addDataEntity(entity); + } + @Override public void addContextualEntity(ContextualEntity entity) { this.metadataContext.checkEntity(entity); @@ -248,8 +304,14 @@ public void deleteEntityById(String entityId) { // remove from the root data entity hasPart this.rootDataEntity.removeFromHasPart(entityId); // remove from the root entity and the file descriptor - JsonUtilFunctions.removeFieldsWith(entityId, this.rootDataEntity.getProperties()); - JsonUtilFunctions.removeFieldsWith(entityId, this.jsonDescriptor.getProperties()); + JsonUtilFunctions.removeFieldsWith( + entityId, + this.rootDataEntity.getProperties() + ); + JsonUtilFunctions.removeFieldsWith( + entityId, + this.jsonDescriptor.getProperties() + ); } @Override @@ -268,7 +330,9 @@ public void deleteUrlFromContext(String key) { } @Override - public void addFromCollection(Collection entities) { + public void addFromCollection( + Collection entities + ) { this.roCratePayload.addEntities(entities); } @@ -282,6 +346,22 @@ public Collection getUntrackedFiles() { return this.untrackedFiles; } + @Override + public HierarchyRecognitionResult createDataEntityFileStructure( + boolean addInverseRelationships + ) { + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .withSetInverseRelationships(addInverseRelationships); + return this.createDataEntityFileStructure(config); + } + + @Override + public HierarchyRecognitionResult createDataEntityFileStructure( + HierarchyRecognitionConfig config + ) { + return new HierarchyRecognition(this, config).buildHierarchy(); + } + /** * The inner class builder for the easier creation of a ROCrate. */ @@ -306,13 +386,18 @@ public static class RoCrateBuilder { * @param datePublished the published date of the crate. * @param licenseId the license identifier of the crate. */ - public RoCrateBuilder(String name, String description, String datePublished, String licenseId) { + public RoCrateBuilder( + String name, + String description, + String datePublished, + String licenseId + ) { this.payload = new RoCratePayload(); this.metadataContext = new RoCrateMetadataContext(); this.rootDataEntity = new RootDataEntity.RootDataEntityBuilder() - .addProperty("name", name) - .addProperty(PROPERTY_DESCRIPTION, description) - .build(); + .addProperty("name", name) + .addProperty(PROPERTY_DESCRIPTION, description) + .build(); this.setLicense(licenseId); this.addDatePublishedWithExceptions(datePublished); } @@ -325,13 +410,18 @@ public RoCrateBuilder(String name, String description, String datePublished, Str * @param datePublished the published date of the crate. * @param license the license entity of the crate. */ - public RoCrateBuilder(String name, String description, String datePublished, ContextualEntity license) { + public RoCrateBuilder( + String name, + String description, + String datePublished, + ContextualEntity license + ) { this.payload = new RoCratePayload(); this.metadataContext = new RoCrateMetadataContext(); this.rootDataEntity = new RootDataEntity.RootDataEntityBuilder() - .addProperty("name", name) - .addProperty(PROPERTY_DESCRIPTION, description) - .build(); + .addProperty("name", name) + .addProperty(PROPERTY_DESCRIPTION, description) + .build(); this.setLicense(license); this.addDatePublishedWithExceptions(datePublished); } @@ -343,8 +433,7 @@ public RoCrateBuilder(String name, String description, String datePublished, Con public RoCrateBuilder() { this.payload = new RoCratePayload(); this.metadataContext = new RoCrateMetadataContext(); - rootDataEntity = new RootDataEntity.RootDataEntityBuilder() - .build(); + rootDataEntity = new RootDataEntity.RootDataEntityBuilder().build(); } /** @@ -399,7 +488,47 @@ public RoCrateBuilder addDataEntity(DataEntity dataEntity) { return this; } - public RoCrateBuilder addContextualEntity(ContextualEntity contextualEntity) { + public void addDataEntity(DataEntity entity, String parentId) + throws IllegalArgumentException { + if (parentId == null) { + throw new IllegalArgumentException("Parent ID is null."); + } + + DataEntity parentEntity = parentId.equals("./") + ? this.rootDataEntity + : this.payload.getDataEntityById(parentId); + + if (parentEntity == null) { + throw new IllegalArgumentException( + "Parent ID not found in the crate." + ); + } + + if (parentEntity.getTypes().contains("File")) { + throw new IllegalArgumentException( + "Parent entity cannot be a File." + ); + } + + if (!parentEntity.getTypes().contains("Dataset")) { + throw new IllegalArgumentException( + "Parent entity must be a Dataset in order to contain another DataEntity as a part." + ); + } + + this.metadataContext.checkEntity(entity); + + if (parentEntity instanceof DataSetEntity) { + ((DataSetEntity) parentEntity).addToHasPart(entity.getId()); + } else { + parentEntity.addProperty("hasPart", entity.getId()); + } + this.payload.addDataEntity(entity); + } + + public RoCrateBuilder addContextualEntity( + ContextualEntity contextualEntity + ) { this.metadataContext.checkEntity(contextualEntity); this.payload.addContextualEntity(contextualEntity); return this; @@ -430,7 +559,8 @@ public RoCrateBuilder setLicense(ContextualEntity license) { * @return the builder */ public RoCrateBuilder setLicense(String licenseId) { - ContextualEntity licenseEntity = new ContextualEntity.ContextualEntityBuilder() + ContextualEntity licenseEntity = + new ContextualEntity.ContextualEntityBuilder() .setId(licenseId) .build(); this.setLicense(licenseEntity); @@ -445,8 +575,12 @@ public RoCrateBuilder setLicense(String licenseId) { * @return this builder * @throws IllegalArgumentException if format is not ISO 8601 */ - public RoCrateBuilder addDatePublishedWithExceptions(String dateValue) throws IllegalArgumentException { - this.rootDataEntity.addDateTimePropertyWithExceptions("datePublished", dateValue); + public RoCrateBuilder addDatePublishedWithExceptions(String dateValue) + throws IllegalArgumentException { + this.rootDataEntity.addDateTimePropertyWithExceptions( + "datePublished", + dateValue + ); return this; } @@ -455,12 +589,12 @@ public RoCrateBuilder setContext(CrateMetadataContext context) { return this; } - public RoCrateBuilder addUrlToContext(java.lang.String url) { + public RoCrateBuilder addUrlToContext(String url) { this.metadataContext.addToContextFromUrl(url); return this; } - public RoCrateBuilder addValuePairToContext(java.lang.String key, java.lang.String value) { + public RoCrateBuilder addValuePairToContext(String key, String value) { this.metadataContext.addToContext(key, value); return this; } @@ -507,7 +641,12 @@ public BuilderWithDraftFeatures() { /** * @see RoCrateBuilder#RoCrateBuilder(String, String, String, String) */ - public BuilderWithDraftFeatures(String name, String description, String datePublished, String licenseId) { + public BuilderWithDraftFeatures( + String name, + String description, + String datePublished, + String licenseId + ) { super(name, description, datePublished, licenseId); } @@ -515,7 +654,12 @@ public BuilderWithDraftFeatures(String name, String description, String datePubl * @see RoCrateBuilder#RoCrateBuilder(String, String, String, * ContextualEntity) */ - public BuilderWithDraftFeatures(String name, String description, String datePublished, ContextualEntity licenseId) { + public BuilderWithDraftFeatures( + String name, + String description, + String datePublished, + ContextualEntity licenseId + ) { super(name, description, datePublished, licenseId); } @@ -540,9 +684,9 @@ public BuilderWithDraftFeatures(RoCrate crate) { */ public BuilderWithDraftFeatures alsoConformsTo(URI specification) { descriptorBuilder - .addConformsTo(specification) - // usage of a draft feature results in draft version numbers of the crate - .setVersion(CrateVersion.LATEST_UNSTABLE); + .addConformsTo(specification) + // usage of a draft feature results in draft version numbers of the crate + .setVersion(CrateVersion.LATEST_UNSTABLE); return this; } } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java index 38afcf4e..e978b199 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java @@ -49,6 +49,11 @@ public class AbstractEntity { private static final EntityValidation entityValidation = new EntityValidation(new JsonSchemaValidation()); + /** + * This set contains all the ids of the entities that are linked by + * this entity. This information is provided to crate payloads to make + * the removal of entities faster. + */ @JsonIgnore private final Set linkedTo; @@ -438,7 +443,9 @@ public T addType(String type) { if (this.types == null) { this.types = new HashSet<>(); } - this.types.add(type); + if (type != null && !type.isEmpty()) { + this.types.add(type); + } return self(); } @@ -622,6 +629,13 @@ public T setAllUnsafe(ObjectNode properties) { // This will currently only print errors. AbstractEntity.entityValidation.entityValidation(properties); this.properties = properties; + JsonNode typeProps = properties.path("@type"); + if (typeProps.isArray()) { + typeProps.valueStream() + .forEach(value -> this.addType(value.asText())); + } else if (typeProps.isTextual()) { + this.addType(typeProps.asText()); + } this.relatedItems.addAll(JsonUtilFunctions.getIdPropertiesFromJsonNode(properties)); return self(); } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java index 2ef078ff..8030c04c 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java @@ -1,12 +1,15 @@ package edu.kit.datamanager.ro_crate.entities.data; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.node.ObjectNode; import edu.kit.datamanager.ro_crate.entities.serializers.HasPartSerializer; import java.util.HashSet; import java.util.Set; +import java.util.stream.Collectors; /** * A helping class for the creating of Data entities of type Dataset. @@ -18,6 +21,13 @@ public class DataSetEntity extends DataEntity { public static final String TYPE = "Dataset"; + /** + * Points to the parts of this dataset. + *

+ * This will be serialized to and deserialized from the "hasPart" property + * and exists for convenience to represent the additional capabilities of + * a DataSetEntity over a normal DataEntity. + */ @JsonSerialize(using = HasPartSerializer.class) @JsonInclude(JsonInclude.Include.NON_EMPTY) public Set hasPart; @@ -29,7 +39,9 @@ public class DataSetEntity extends DataEntity { */ public DataSetEntity(AbstractDataSetBuilder entityBuilder) { super(entityBuilder); - this.hasPart = entityBuilder.hasPart; + this.hasPart = entityBuilder.hasPart.stream() + .filter(s -> !s.isBlank()) + .collect(Collectors.toSet()); this.addType(TYPE); } @@ -38,13 +50,33 @@ public void removeFromHasPart(String str) { } public void addToHasPart(String id) { - this.hasPart.add(id); + if (id != null && !id.isEmpty()) { + this.hasPart.add(id); + } } + /** + * Check if the hasPart property contains a specific id. + * + * @deprecated use {@link #hasPart(String)} instead. + * + * @param id the id to check for + * @return true if the id is present, false otherwise + */ + @Deprecated(forRemoval = true) public boolean hasInHasPart(String id) { return this.hasPart.contains(id); } + /** + * Check if the hasPart property contains a specific id. + * @param id the id to check for + * @return true if the id is present, false otherwise + */ + public boolean hasPart(String id) { + return this.hasPart.contains(id); + } + abstract static class AbstractDataSetBuilder> extends AbstractDataEntityBuilder { @@ -54,8 +86,8 @@ public AbstractDataSetBuilder() { this.hasPart = new HashSet<>(); } - public T setHasPart(Set hastPart) { - this.hasPart = hastPart; + public T setHasPart(Set hasPart) { + this.hasPart = hasPart; return self(); } @@ -75,6 +107,23 @@ public T addToHasPart(String dataEntity) { return self(); } + @Override + public T setAllUnsafe(ObjectNode properties) { + super.setAllUnsafe(properties); + JsonNode hasPart = properties.path("hasPart"); + String txt = hasPart.asText(); + if (!txt.isBlank()) { + this.hasPart.add(txt); + } + hasPart.valueStream() + .map(JsonNode::asText) + .filter(value -> !value.isBlank()) + .forEach( + value -> this.hasPart.add(value) + ); + return self(); + } + @Override public abstract DataSetEntity build(); } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognition.java b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognition.java new file mode 100644 index 00000000..46a278c7 --- /dev/null +++ b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognition.java @@ -0,0 +1,208 @@ +package edu.kit.datamanager.ro_crate.hierarchy; + +import edu.kit.datamanager.ro_crate.Crate; +import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; +import edu.kit.datamanager.ro_crate.util.FileSystemUtil; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class HierarchyRecognition { + protected final Crate crate; + protected final HierarchyRecognitionConfig config; + + public HierarchyRecognition(Crate crate, HierarchyRecognitionConfig config) { + this.crate = crate; + this.config = config; + } + + public HierarchyRecognitionResult buildHierarchy() { + HierarchyRecognitionResult result = new HierarchyRecognitionResult(); + + try { + // Get all data entities to process + Set allEntities = this.crate.getAllDataEntities(); + allEntities.add(crate.getRootDataEntity()); + + // Filter entities with file path IDs (not URLs, DOIs, etc.) + Map pathEntities = new HashMap<>(); + for (DataEntity entity : allEntities) { + String id = entity.getId(); + if (FileSystemUtil.isFilePath(id)) { + pathEntities.put(id, entity); + } else { + result.addSkippedEntity(entity); + } + } + + + // Validate hierarchy before making changes + if (!HierarchyRecognition.validateHierarchy(pathEntities, result)) { + return result; + } + + // Create missing intermediate entities if configured + if (config.createMissingIntermediateEntities()) { + this.createMissingIntermediateEntities(pathEntities, result); + } + + // Clear existing relationships if configured + if (config.removeExistingConnections()) { + this.clearExistingRelationships(pathEntities); + } + + // Build hierarchy relationships + this.buildHierarchyRelationships(pathEntities, config, result); + + return result; + } catch (Exception e) { + result.addError( + "Unexpected error during hierarchy recognition: " + + e.getMessage() + ); + return result; + } + } + + /** + * Validates that the hierarchy is consistent (no files containing other files/folders). + * + * @param pathEntities map of path IDs to DataEntities + * @param result builder to collect errors + * @return true if valid, false if invalid hierarchy detected + */ + protected static boolean validateHierarchy( + Map pathEntities, + HierarchyRecognitionResult result + ) { + for (Map.Entry entry : pathEntities.entrySet()) { + String childId = entry.getKey(); + String parentPath = FileSystemUtil.getParentPath(childId); + if (parentPath == null || parentPath.equals("./")) { + continue; + } + + // Check both with and without trailing slash since files don't have slash but folders do + DataEntity parentEntity = pathEntities.get(parentPath); + if (parentEntity == null) { + parentEntity = pathEntities.get(parentPath + "/"); + } + + if (parentEntity == null) { + continue; + } + + // Check for invalid hierarchy: file cannot contain another file/folder + if (parentEntity.getTypes().contains("File")) { + result.addError( + "Invalid hierarchy: file '" + + parentEntity.getId() + + "' cannot contain '" + + childId + + "'" + ); + return false; + } + } + return true; + } + + /** + * Creates missing intermediate DataSetEntity instances for folder paths. + * + * @param pathEntities map of path IDs to DataEntities + * @param result builder to collect created entities + */ + protected void createMissingIntermediateEntities( + Map pathEntities, + HierarchyRecognitionResult result + ) { + Set missingPaths = new HashSet<>(); + + // Find all missing intermediate paths + for (String path : pathEntities.keySet()) { + String parentPath = FileSystemUtil.getParentPath(path); + while (parentPath != null && !parentPath.equals("./")) { + String folderPath = parentPath + "/"; + final boolean containsParent = pathEntities.containsKey(parentPath); + final boolean containsFolder = pathEntities.containsKey(folderPath); + if (!containsParent && !containsFolder) { + missingPaths.add(folderPath); + } + parentPath = FileSystemUtil.getParentPath(parentPath); + } + } + + // Create missing DataSetEntity instances + for (String missingPath : missingPaths) { + DataSetEntity newEntity = new DataSetEntity.DataSetBuilder() + .setId(missingPath) + .addProperty("name", "Auto-generated folder: " + missingPath) + .build(); + + this.crate.addDataEntity(newEntity); + pathEntities.put(missingPath, newEntity); + result.addCreatedEntity(newEntity); + } + } + + protected void buildHierarchyRelationships( + Map pathEntities, + HierarchyRecognitionConfig config, + HierarchyRecognitionResult result + ) { + for (Map.Entry entry : pathEntities.entrySet()) { + String childId = entry.getKey(); + DataEntity childEntity = entry.getValue(); + String parentPath = FileSystemUtil.getParentPath(childId); + if (parentPath == null) { + continue; + } + + // Check both with and without trailing slash since files don't have slash but folders do + DataEntity parentEntity = pathEntities.get(parentPath); + String actualParentId = parentPath; + + if (parentEntity == null) { + parentEntity = pathEntities.get(parentPath + "/"); + actualParentId = parentPath + "/"; + } + + if (parentEntity == null) { + continue; + } + + // Add hasPart relationship + if (parentEntity instanceof DataSetEntity) { + ((DataSetEntity) parentEntity).addToHasPart(childId); + result.addProcessedRelationship( + actualParentId, + childId + ); + } + + // Add isPartOf relationship if configured + if (config.createInverseRelationships()) { + childEntity.addProperty("isPartOf", actualParentId); + } + + // Remove from root if it has a parent that is not root + if (!parentPath.equals("./")) { + this.crate.getRootDataEntity().removeFromHasPart(childId); + } + } + } + + protected void clearExistingRelationships( + Map pathEntities + ) { + for (DataEntity entity : pathEntities.values()) { + if (entity instanceof DataSetEntity) { + ((DataSetEntity) entity).hasPart.clear(); + } + } + } +} diff --git a/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionConfig.java b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionConfig.java new file mode 100644 index 00000000..c37372d0 --- /dev/null +++ b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionConfig.java @@ -0,0 +1,43 @@ +package edu.kit.datamanager.ro_crate.hierarchy; + +/** + * Configuration class for automatic hierarchy recognition. + * This class provides control over how the hierarchy recognition. + * + *

    + *
  • createMissingIntermediateEntities: Whether missing intermediate folder entities should be automatically created. Default: false
  • + *
  • createInverseRelationships: Whether isPartOf relationships should be added in addition to hasPart. Default: false
  • + *
  • removeExistingConnections: Whether hasPart relationships should be added (false) or remove existing relations in beforehand (true). Default: false
  • + *
+ */ +public record HierarchyRecognitionConfig( + boolean createMissingIntermediateEntities, + boolean createInverseRelationships, + boolean removeExistingConnections +) { + /** + * Creates a new configuration with default values. + *

+ * Default values: + *

    + *
  • createMissingIntermediateEntities: false
  • + *
  • createInverseRelationships: false
  • + *
  • removeExistingConnections: false
  • + *
+ */ + public HierarchyRecognitionConfig() { + this(false, false, false); + } + + public HierarchyRecognitionConfig withCreateMissingIntermediateEntities(boolean value) { + return new HierarchyRecognitionConfig(value, this.createInverseRelationships, this.removeExistingConnections); + } + + public HierarchyRecognitionConfig withSetInverseRelationships(boolean value) { + return new HierarchyRecognitionConfig(this.createMissingIntermediateEntities, value, this.removeExistingConnections); + } + + public HierarchyRecognitionConfig withRemoveExistingConnections(boolean value) { + return new HierarchyRecognitionConfig(this.createMissingIntermediateEntities, this.createInverseRelationships, value); + } +} diff --git a/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionResult.java b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionResult.java new file mode 100644 index 00000000..ce4eab50 --- /dev/null +++ b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionResult.java @@ -0,0 +1,51 @@ +package edu.kit.datamanager.ro_crate.hierarchy; + +import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; + +import java.util.*; + +/** + * Result class containing information about the automatic hierarchy recognition operation. + * This class provides details about what was processed, created, and any issues encountered + * during the hierarchy recognition process. Always contains complete information about the + * operation result, including success/failure and any errors encountered. + */ +public record HierarchyRecognitionResult( + Set createdEntities, + Map> processedRelationships, + Set skippedEntities, + List warnings, + List errors +) { + HierarchyRecognitionResult() { + this(new HashSet<>(), new HashMap<>(), new HashSet<>(), new ArrayList<>(), new ArrayList<>()); + } + + /** + * Whether there were no errors during the hierarchy recognition operation. + * + * @return true if the operation completed successfully (no errors), false otherwise. + */ + public boolean isSuccessful() { + return this.errors.isEmpty(); + } + + public void addSkippedEntity(DataEntity entity) { + this.skippedEntities.add(entity); + } + + public void addError(String errorMessage) { + this.errors.add(errorMessage); + } + + public void addCreatedEntity(DataSetEntity newEntity) { + this.createdEntities.add(newEntity); + } + + public void addProcessedRelationship(String from, String to) { + this.processedRelationships + .computeIfAbsent(from, k -> new HashSet<>()) + .add(to); + } +} diff --git a/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/package-info.java b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/package-info.java new file mode 100644 index 00000000..5d8f119f --- /dev/null +++ b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/package-info.java @@ -0,0 +1,9 @@ +/** + * This package contains classes to enable automatic recognition and construction of + * file and folder hierarchies within RO-Crates. + *

+ * The main functionality is provided by the {@link edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognition} + * class, which analyzes the paths of data entities and establishes appropriate + * "hasPart" and "isPartOf" relationships to reflect the underlying file system structure. + */ +package edu.kit.datamanager.ro_crate.hierarchy; \ No newline at end of file diff --git a/src/main/java/edu/kit/datamanager/ro_crate/payload/RoCratePayload.java b/src/main/java/edu/kit/datamanager/ro_crate/payload/RoCratePayload.java index f5f09363..bc76d6a2 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/payload/RoCratePayload.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/payload/RoCratePayload.java @@ -20,6 +20,11 @@ public class RoCratePayload implements CratePayload { private final HashMap dataEntities; private final HashMap contextualEntities; + + /** + * A map containing for each entity id a set of ids of entities that are linked to it. + * This is used to make the removal of entities from the crate faster. + */ private final HashMap> associatedItems; /** diff --git a/src/main/java/edu/kit/datamanager/ro_crate/reader/CrateReader.java b/src/main/java/edu/kit/datamanager/ro_crate/reader/CrateReader.java index 271725b5..15cae146 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/reader/CrateReader.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/reader/CrateReader.java @@ -8,6 +8,7 @@ import edu.kit.datamanager.ro_crate.context.RoCrateMetadataContext; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity; import edu.kit.datamanager.ro_crate.special.IdentifierUtils; import edu.kit.datamanager.ro_crate.special.JsonUtilFunctions; @@ -114,31 +115,47 @@ private RoCrate rebuildCrate(ObjectNode metadataJson, File files, HashSet dataEntityIds = getDataEntityIds(root, graph); - for (JsonNode entityJson : graph) { - String eId = unpackId(entityJson); - if (dataEntityIds.contains(eId)) { + } + RootDataEntity root = crate.getRootDataEntity(); + boolean rootExtractionSuccessful = root != null; + + if (rootExtractionSuccessful) { + Set dataEntityIds = getDataEntityIds(root, graph); + for (JsonNode entityJson : graph) { + String eId = unpackId(entityJson); + ObjectNode properties = entityJson.deepCopy(); + boolean isDataEntity = dataEntityIds.contains(eId); + + if (isDataEntity) { + DataEntity data = null; + boolean isDataSet = properties.path("@type").asText().equals("Dataset") + || properties.path("@type").valueStream() + .anyMatch(typeString -> typeString.asText().equals("Dataset")); + if (isDataSet) { + data = new DataSetEntity.DataSetBuilder() + .setAllUnsafe(properties) + .build(); + } else { // data entity - DataEntity.DataEntityBuilder dataEntity = new DataEntity.DataEntityBuilder() - .setAllUnsafe(entityJson.deepCopy()); + DataEntity.DataEntityBuilder builder = new DataEntity.DataEntityBuilder() + .setAllUnsafe(properties); // Handle data entities with corresponding file checkFolderHasFile(entityJson.get(PROP_ID).asText(), files).ifPresent(file -> { usedFiles.add(file.getPath()); - dataEntity.setLocationWithExceptions(file.toPath()) + builder.setLocationWithExceptions(file.toPath()) .setId(file.getName()); }); - - crate.addDataEntityWithoutRootHasPart(dataEntity.build()); - } else { - // contextual entity - crate.addContextualEntity( - new ContextualEntity.ContextualEntityBuilder() - .setAllUnsafe(entityJson.deepCopy()) - .build()); + data = builder.build(); } + crate.addDataEntityWithoutRootHasPart(data); + + } else { + // contextual entity + crate.addContextualEntity( + new ContextualEntity.ContextualEntityBuilder() + .setAllUnsafe(properties) + .build()); } } } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java b/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java index e4d75442..82c39529 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java @@ -12,6 +12,54 @@ private FileSystemUtil() { // Utility class, no instantiation } + /** + * Checks if the given ID appears to be a file path. + *

+ * Specifically excludes IDs starting with "doi:", "http", or "https". + * + * @param id the ID to check + * @return true if it looks like a file path, false otherwise + */ + public static boolean isFilePath(String id) { + return id != null && !( + id.startsWith("doi:") || + id.startsWith("http://") || + id.startsWith("https://") + ); + } + + /** + * Gets the parent path of a given path. + * @param path the path to evaluate. + * @return the parent path, or null if no parent exists. + */ + public static String getParentPath(String path) { + if (path == null || path.equals("./") || path.isEmpty()) { + return null; + } + + // Normalize path - remove trailing slash for consistency + String normalizedPath = path.endsWith("/") + ? path.substring(0, path.length() - 1) + : path; + + int lastSlash = normalizedPath.lastIndexOf('/'); + if (lastSlash == -1) { + return "./"; // Root directory + } + + String parentPath = normalizedPath.substring(0, lastSlash); + + // If parent is empty, it's root + if (parentPath.isEmpty()) { + return "./"; + } + + // For validation, we need to check both with and without trailing slash + // since files don't have trailing slash but folders do + return parentPath; + } + /** * Removes a specific set of given file extensions from a file name, if present. * The extensions are case-insensitive. Given "ELN", "eln" or "Eln" will also match. diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java new file mode 100644 index 00000000..49811fc7 --- /dev/null +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java @@ -0,0 +1,202 @@ +package edu.kit.datamanager.ro_crate.crate; + +import edu.kit.datamanager.ro_crate.Crate; +import edu.kit.datamanager.ro_crate.RoCrate; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; +import edu.kit.datamanager.ro_crate.entities.data.FileEntity; +import edu.kit.datamanager.ro_crate.reader.Readers; +import edu.kit.datamanager.ro_crate.writer.Writers; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests related to the addDataEntity(DataEntity, String) method and the hasPart + * property. + */ +public class HasPartTest { + + @Nested + @DisplayName("Test for crate (not the builder!) addDataEntity(DataEntity, String)") + class CrateHasPartTest { + private RoCrate crate; + + @BeforeEach + void setUp() { + crate = new RoCrate.RoCrateBuilder( + "Test Crate", + "HasPartTest", + "2025", + "https://creativecommons.org/licenses/by/4.0/" + ).build(); + } + + @Test + public void givenEmptyCrate_whenAddingWithConnection_thenThrowsException() { + // Given empty crate (default) + // ... + // When adding entity with connection, Throws Exception + FileEntity d = new FileEntity.FileEntityBuilder().build(); + assertThrows(IllegalArgumentException.class, () -> this.crate.addDataEntity(d, "nonexitent")); + } + + @Test + public void givenEmptyCrate_whenAddingToRoot_thenConnectionExists() { + // Given empty crate (default) + // ... + // When adding entity to root + final String id = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(id) + .build(); + this.crate.addDataEntity(d, "./"); + // Then root added entity with hasPart + assertTrue(this.crate.getRootDataEntity().hasPart(id)); + assertNotNull(this.crate.getEntityById(id)); + } + + @Test + public void givenCrateWithFolder_whenAddingToFolder_thenConnectionExists() { + // Given crate with folder + this.crate = new RoCrate.RoCrateBuilder() + .addDataEntity(new DataSetEntity.DataSetBuilder().setId("./folder").build()) + .build(); + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.crate.addDataEntity(d, "./folder"); + // Then this connection exists + // Cast required because type was not yet serialized and is not yet in properties. + assertTrue(((DataSetEntity) this.crate.getDataEntityById("./folder")).hasPart(dataId)); + assertNotNull(this.crate.getEntityById(dataId)); + } + + @Test + public void givenCrateWithFolderWithFile_whenReadingFromDisk_thenConnectionExists( + @TempDir Path path + ) throws IOException { + // Given crate from disk + String folderId = "./folder/"; + this.crate = new RoCrate.RoCrateBuilder() + .addDataEntity(new DataSetEntity.DataSetBuilder().setId(folderId).build()) + .build(); + + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.crate.addDataEntity(d, folderId); + + Writers.newFolderWriter().save(this.crate, path.toString()); + Crate read = Readers.newFolderReader().readCrate(path.toString()); + + // Then this connection exists + // Note how the types are loaded when deserializing. Alternatively, you can find them in their properties. + assertTrue(read.getDataEntityById(folderId).getTypes().contains("Dataset")); + // Note how you can cast an entity to a dataSetEntity. + assertTrue(read.getDataSetById(folderId).orElseThrow().hasPart(dataId)); + } + } + + @Nested + @DisplayName("Testing the builder addDataEntity(DataEntity, String)") + class BuilderHasPartTest { + private RoCrate.RoCrateBuilder builder; + + @BeforeEach + void setUp() { + builder = new RoCrate.RoCrateBuilder( + "Test Crate", + "HasPartTest", + "2025", + "https://creativecommons.org/licenses/by/4.0/" + ); + } + + @Test + public void givenEmptyCrate_whenAddingWithConnection_thenThrowsException() { + // Given empty crate (default) + // ... + // When adding entity with connection, Throws Exception + FileEntity d = new FileEntity.FileEntityBuilder().build(); + assertThrows(IllegalArgumentException.class, () -> this.builder.addDataEntity(d, "nonexitent")); + } + + @Test + public void givenEmptyCrate_whenAddingToRoot_thenConnectionExists() { + // Given empty crate (default) + // ... + // When adding entity to root + final String id = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(id) + .build(); + this.builder.addDataEntity(d, "./"); + // Then root added entity with hasPart + Crate crate = this.builder.build(); + assertTrue(crate.getRootDataEntity().hasPart(id)); + assertNotNull(crate.getEntityById(id)); + } + + @Test + public void givenCrateWithFolder_whenAddingToFolder_thenConnectionExists() { + // Given crate with folder + this.builder.addDataEntity( + new DataSetEntity.DataSetBuilder() + .setId("./folder") + .build() + ); + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.builder.addDataEntity(d, "./folder"); + // Then this connection exists + Crate crate = this.builder.build(); + // Cast required because type was not yet serialized and is not yet in properties. + assertTrue(((DataSetEntity) crate.getDataEntityById("./folder")).hasPart(dataId)); + assertNotNull(crate.getEntityById(dataId)); + } + + @Test + public void givenCrateWithFolderWithFile_whenReadingFromDisk_thenConnectionExists( + @TempDir Path path + ) throws IOException { + // Given crate from disk + String folderId = "./folder/"; + this.builder.addDataEntity( + new DataSetEntity.DataSetBuilder() + .setId(folderId) + .build() + ); + + + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.builder.addDataEntity(d, folderId); + + Writers.newFolderWriter().save(this.builder.build(), path.toString()); + Crate read = Readers.newFolderReader().readCrate(path.toString()); + + // Then this connection exists + // Note how the types are loaded when deserializing. Alternatively, you can find them in their properties. + assertTrue(read.getDataEntityById(folderId).getTypes().contains("Dataset")); + // Note how you can cast an entity to a dataSetEntity. + assertTrue(read.getDataSetById(folderId).orElseThrow().hasPart(dataId)); + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntityTest.java b/src/test/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntityTest.java index b6d51243..5ae4142d 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntityTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntityTest.java @@ -72,7 +72,7 @@ void testDirWithHasPartDeserialization() throws IOException { .addToHasPart(second_content) .build(); - assertTrue(dir.hasInHasPart(id)); + assertTrue(dir.hasPart(id)); HelpFunctions.compareEntityWithFile(dir, "/json/entities/data/directoryWeb.json"); } diff --git a/src/test/java/edu/kit/datamanager/ro_crate/entities/data/RootDataEntityTest.java b/src/test/java/edu/kit/datamanager/ro_crate/entities/data/RootDataEntityTest.java index b846ef87..76033f70 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/entities/data/RootDataEntityTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/entities/data/RootDataEntityTest.java @@ -46,8 +46,8 @@ void testSerialization() throws IOException { .addAuthor("a2") .build(); - assertTrue(rootDataEntity.hasInHasPart(id1)); - assertTrue(rootDataEntity.hasInHasPart(id2)); + assertTrue(rootDataEntity.hasPart(id1)); + assertTrue(rootDataEntity.hasPart(id2)); HelpFunctions.compareEntityWithFile(rootDataEntity, "/json/entities/data/root.json"); } diff --git a/src/test/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionTest.java b/src/test/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionTest.java new file mode 100644 index 00000000..e4c82e36 --- /dev/null +++ b/src/test/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionTest.java @@ -0,0 +1,366 @@ +package edu.kit.datamanager.ro_crate.hierarchy; + +import static org.junit.jupiter.api.Assertions.*; + +import edu.kit.datamanager.ro_crate.RoCrate; +import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; +import edu.kit.datamanager.ro_crate.entities.data.FileEntity; +import java.nio.file.Paths; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Automatic Hierarchy Recognition - API Tests & Usage Examples + */ +public class HierarchyRecognitionTest { + + private RoCrate crate; + + @BeforeEach + void setUp() { + crate = new RoCrate.RoCrateBuilder( + "Test Crate", + "A crate for testing hierarchy recognition", + "2024", + "https://creativecommons.org/licenses/by/4.0/" + ).build(); + } + + /** + * One-directional recognition in simple hierarchy. + */ + @Test + void givenFilesInFolderHierarchy_whenRecognizeStructure_thenEstablishesParentChildRelations() { + // Given: A crate with files and folders in a hierarchy + FileEntity file1 = new FileEntity.FileEntityBuilder() + .setId("data/raw/experiment1.csv") + .setLocationWithExceptions(Paths.get("test1.csv")) + .build(); + + FileEntity file2 = new FileEntity.FileEntityBuilder() + .setId("data/processed/results.txt") + .setLocationWithExceptions(Paths.get("test2.txt")) + .build(); + + DataSetEntity dataFolder = new DataSetEntity.DataSetBuilder() + .setId("data/") + .addProperty("name", "Data Directory") + .build(); + + DataSetEntity rawFolder = new DataSetEntity.DataSetBuilder() + .setId("data/raw/") + .addProperty("name", "Raw Data") + .build(); + + DataSetEntity processedFolder = new DataSetEntity.DataSetBuilder() + .setId("data/processed/") + .addProperty("name", "Processed Data") + .build(); + + crate.addDataEntity(file1); + crate.addDataEntity(file2); + crate.addDataEntity(dataFolder); + crate.addDataEntity(rawFolder); + crate.addDataEntity(processedFolder); + + // When: We automatically recognize hierarchy + crate.createDataEntityFileStructure(false); + + // Then: Hierarchy should be established + assertTrue(dataFolder.hasPart("data/raw/")); + assertTrue(dataFolder.hasPart("data/processed/")); + assertTrue(rawFolder.hasPart("data/raw/experiment1.csv")); + assertTrue(processedFolder.hasPart("data/processed/results.txt")); + + // Root should only contain top-level entities + var root = crate.getRootDataEntity(); + assertTrue(root.hasPart("data/")); + assertEquals(1, root.hasPart.size()); + } + + /** + * Adding bidirectional relationships. + */ + @Test + void givenFileInFolder_whenRecognizeWithIsPartOf_thenCreatesBidirectionalRelations() { + FileEntity file = new FileEntity.FileEntityBuilder() + .setId("folder/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + DataSetEntity folder = new DataSetEntity.DataSetBuilder() + .setId("folder/") + .build(); + + crate.addDataEntity(file); + crate.addDataEntity(folder); + + // When: We enable isPartOf relationships + crate.createDataEntityFileStructure(true); + + // Then: Both hasPart and isPartOf should be set + assertTrue(folder.hasPart("folder/file.txt")); + assertEquals("folder/", file.getProperties().get("isPartOf").asText()); + // same for root! + var root = crate.getRootDataEntity(); + assertTrue(root.hasPart("folder/")); + assertEquals(1, root.hasPart.size()); + assertEquals(root.getId(), folder.getProperties().get("isPartOf").asText("")); + } + + /** + * Advanced configuration with missing-folder-creation enabled. + */ + @Test + void givenDeepNestedPathWithMissingIntermediates_whenRecognizeWithCreateMissing_thenCreatesAllIntermediateEntities() { + FileEntity file = new FileEntity.FileEntityBuilder() + .setId("data/deep/nested/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + crate.addDataEntity(file); + + // When: We configure to create missing intermediate entities + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .withCreateMissingIntermediateEntities(true) + .withSetInverseRelationships(true) + .withRemoveExistingConnections(true); + + HierarchyRecognitionResult result = crate.createDataEntityFileStructure( + config + ); + + // Then: Missing intermediate entities should be created + assertTrue(result.isSuccessful()); + assertNotNull(crate.getDataEntityById("data/")); + assertNotNull(crate.getDataEntityById("data/deep/")); + assertNotNull(crate.getDataEntityById("data/deep/nested/")); + + // And hierarchy should be established + assertTrue( + ((DataSetEntity) crate.getDataEntityById("data/")).hasPart( + "data/deep/" + ) + ); + assertTrue( + ((DataSetEntity) crate.getDataEntityById("data/deep/")).hasPart( + "data/deep/nested/" + ) + ); + assertTrue( + ((DataSetEntity) crate.getDataEntityById( + "data/deep/nested/" + )).hasPart("data/deep/nested/file.txt") + ); + } + + /** + * Removing existing manual relationships. + */ + @Test + void givenFolderWithExistingRelations_whenRecognizeWithRemoveExisting_thenKeepsOnlyNewRelations() { + FileEntity file1 = new FileEntity.FileEntityBuilder() + .setId("folder/file1.txt") + .setLocationWithExceptions(Paths.get("test1.txt")) + .build(); + + FileEntity file2 = new FileEntity.FileEntityBuilder() + .setId("folder/file2.txt") + .setLocationWithExceptions(Paths.get("test2.txt")) + .build(); + + DataSetEntity folder = new DataSetEntity.DataSetBuilder() + .setId("folder/") + .addToHasPart("manually-added-entity") + .build(); + + crate.addDataEntity(file1); + crate.addDataEntity(file2); + crate.addDataEntity(folder); + + // When: We merge with existing relationships + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .withRemoveExistingConnections(true); + + crate.createDataEntityFileStructure(config); + + // Then: Both existing and new relationships should exist + assertFalse(folder.hasPart("manually-added-entity")); + assertTrue(folder.hasPart("folder/file1.txt")); + assertTrue(folder.hasPart("folder/file2.txt")); + } + + /** + * Default behavior keeps existing relationships. + */ + @Test + void givenFolderWithExistingRelations_whenRecognizeWithDefaultBehavior_thenKeepsExistingRelations() { + FileEntity file = new FileEntity.FileEntityBuilder() + .setId("folder/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + DataSetEntity folder = new DataSetEntity.DataSetBuilder() + .setId("folder/") + .addToHasPart("manually-added-entity") + .build(); + + crate.addDataEntity(file); + crate.addDataEntity(folder); + + // When: We use default behavior (keep existing) + crate.createDataEntityFileStructure(false); + + // Then: Only new relationships should exist + assertTrue(folder.hasPart("manually-added-entity")); + assertTrue(folder.hasPart("folder/file.txt")); + } + + /** + * Test skipping non-file-path IDs + */ + @Test + void givenMixOfFilePathsUrlsAndDois_whenRecognizeStructure_thenProcessesOnlyFilePaths() { + FileEntity localFile = new FileEntity.FileEntityBuilder() + .setId("folder/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + DataEntity remoteEntity = new DataEntity.DataEntityBuilder() + .setId("https://example.com/remote-file.txt") + .addType("File") + .build(); + + DataEntity doiEntity = new DataEntity.DataEntityBuilder() + .setId("doi:10.1234/example") + .addType("CreativeWork") + .build(); + + DataSetEntity folder = new DataSetEntity.DataSetBuilder() + .setId("folder/") + .build(); + + crate.addDataEntity(localFile); + crate.addDataEntity(remoteEntity); + crate.addDataEntity(doiEntity); + crate.addDataEntity(folder); + + // When: We recognize hierarchy + crate.createDataEntityFileStructure(false); + + // Then: Only local file paths should be processed + assertTrue(folder.hasPart("folder/file.txt")); + assertFalse(folder.hasPart("https://example.com/remote-file.txt")); + assertFalse(folder.hasPart("doi:10.1234/example")); + + // Remote and DOI entities should remain in root + assertTrue( + crate + .getRootDataEntity() + .hasPart("https://example.com/remote-file.txt") + ); + assertTrue(crate.getRootDataEntity().hasPart("doi:10.1234/example")); + } + + /** + * Test error handling with circular references + */ + @Test + void givenEntitiesWithCircularPathReferences_whenRecognizeStructure_thenHandlesGracefullyWithoutException() { + // This would be a malformed crate, but we should handle it gracefully + DataSetEntity folder1 = new DataSetEntity.DataSetBuilder() + .setId("folder1/") + .build(); + + DataSetEntity folder2 = new DataSetEntity.DataSetBuilder() + .setId("folder1/folder2/") + .build(); + + // Manually create circular reference in IDs (this is contrived but tests the logic) + DataEntity circularEntity = new DataEntity.DataEntityBuilder() + .setId("folder1/folder2/../../../folder1/file.txt") // resolves to folder1/file.txt + .addType("File") + .build(); + + crate.addDataEntity(folder1); + crate.addDataEntity(folder2); + crate.addDataEntity(circularEntity); + + // When/Then: Should handle gracefully + assertDoesNotThrow(() -> { + // When: Default configuration for hierarchy recognition + HierarchyRecognitionResult result = + crate.createDataEntityFileStructure( + new HierarchyRecognitionConfig() + ); + // Then: Does not throw exception or error. + assertTrue(result.isSuccessful()); + }); + } + + /** + * Test validation before any changes are made + */ + @Test + void givenInvalidEntityData_whenRecognizeStructure_thenFailsWithoutMakingChanges() { + // Given: A file appears to be inside another file (invalid hierarchy) + FileEntity parentFile = new FileEntity.FileEntityBuilder() + .setId("document.pdf") + .setLocationWithExceptions(Paths.get("document.pdf")) + .build(); + + FileEntity childFile = new FileEntity.FileEntityBuilder() + .setId("document.pdf/embedded_data.txt") // Invalid: file inside a file + .setLocationWithExceptions(Paths.get("embedded.txt")) + .build(); + + crate.addDataEntity(parentFile); + crate.addDataEntity(childFile); + + // When: We try to recognize hierarchy + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig(); + HierarchyRecognitionResult result = crate.createDataEntityFileStructure( + config + ); + + // Then: Should fail without making any changes + assertFalse(result.isSuccessful()); + + // Original state should be preserved + assertTrue(crate.getRootDataEntity().hasPart("document.pdf")); + assertTrue( + crate.getRootDataEntity().hasPart("document.pdf/embedded_data.txt") + ); + } + + /** + * Test result object provides useful information + */ + @Test + void givenFileRequiringIntermediateCreation_whenRecognizeStructure_thenReturnsDetailedOperationInfo() { + FileEntity file = new FileEntity.FileEntityBuilder() + .setId("folder/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + crate.addDataEntity(file); + + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .withCreateMissingIntermediateEntities(true); + + // When: We recognize hierarchy + HierarchyRecognitionResult result = crate.createDataEntityFileStructure( + config + ); + + // Then: Result should provide useful information + assertTrue(result.isSuccessful()); + + assertEquals(1, result.createdEntities().size()); // "folder/" was created + assertEquals(2, result.processedRelationships().size()); // root -> folder -> file relationship + assertTrue(result.skippedEntities().isEmpty()); // no entities skipped + assertTrue(result.warnings().isEmpty()); // no warnings + } +}