diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index ba8a9308..fe8a5ce3 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -11,7 +11,6 @@ on: push: branches: [ main, development ] pull_request: - branches: [ main, development ] workflow_dispatch: env: diff --git a/README.md b/README.md index e5613a23..6d896722 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ and avoiding crates which do not fully comply to the specification, at the same - [Instructions for your build manager (e.g., Gradle, Maven, etc.)](https://central.sonatype.com/artifact/edu.kit.datamanager/ro-crate-java) - [Quick-Start](#quick-start) -- [Adapting Specification Examples](#adapting-the-specification-examples) +- [JavaDoc Documentation](https://javadoc.io/doc/edu.kit.datamanager/ro-crate-java) - [Related Publications](https://publikationen.bibliothek.kit.edu/publikationslisten/get.php?referencing=all&external_publications=kit&lang=de&format=html&style=kit-3lines-title_b-authors-other&consider_suborganizations=true&order=desc%20year&contributors=%5B%5B%5B%5D%2C%5B%22p20751.105%22%5D%5D%5D&title_contains=crate) ## Build the library / documentation @@ -31,687 +31,34 @@ On Windows, replace `./gradlew` with `gradlew.bat`. ## RO-Crate Specification Compatibility -- ✅ Version 1.1 +- ✅ [Version 1.1](https://www.researchobject.org/ro-crate/1.1/) ([Extracted examples as well-described unit tests/guide](src/test/java/edu/kit/datamanager/ro_crate/examples/ExamplesOfSpecificationV1p1Test.java)) - 🛠️ Version 1.2-DRAFT - ✅ Reading and writing crates with additional profiles or specifications ([examples for reading](src/test/java/edu/kit/datamanager/ro_crate/reader/RoCrateReaderSpec12Test.java), [examples for writing](src/test/java/edu/kit/datamanager/ro_crate/writer/RoCrateWriterSpec12Test.java)) - ✅ Adding profiles or other specifications to a crate ([examples](src/test/java/edu/kit/datamanager/ro_crate/crate/BuilderSpec12Test.java)) ## Quick-start -### Example for a basic crate from [RO-Crate website](https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor) -```java -RoCrate roCrate = new RoCrateBuilder("name", "description", "datePublished", "licenseIdentifier").build(); -``` -### Example adding a File (Data Entity) and a context pair +` ro-crate-java` makes use of the builder pattern to guide the user to create a valid RO-Crate, similar to: + ```java -RoCrate roCrate = new RoCrateBuilder("name", "description", "datePublished", "licenseIdentifier") - .addValuePairToContext("Station", "www.station.com") - .addUrlToContext("contextUrl") +RoCrate myFirstCrate = STARTER_CRATE .addDataEntity( - new FileEntity.FileEntityBuilder() - .setId("survey-responses-2019.csv") - .addProperty("name", "Survey responses") - .addProperty("contentSize", "26452") - .addProperty("encodingFormat", "text/csv") - .build() + new FileEntity.FileEntityBuilder() + .setId("path/within/crate/survey-responses-2019.csv") + .setLocation(Paths.get("path/to/current/location/experiment.csv")) + .addProperty("name", "Survey responses") + .addProperty("contentSize", "26452") + .addProperty("encodingFormat", "text/csv") + .build() ) - .addDataEntity(...) - ... - .addContextualEntity(...) - ... - .build(); -``` - -The library currently comes with three specialized DataEntities: - -1. `DataSetEntity` -2. `FileEntity` (used in the example above) -3. `WorkflowEntity` - -If another type of `DataEntity` is required, the base class `DataEntity` can be used. Example: -```java -new DataEntity.DataEntityBuilder() - .addType("CreativeWork") - .setId("ID") - .addProperty("property from schema.org/Creativework", "value") - .build(); -``` -Note that here you are supposed to add the type of your `DataEntity` because it is not known. - -A `DataEntity` and its subclasses can have a file located on the web. Example: - -Example adding file: -```java -new FileEntity.FileEntityBuilder() - .addContent(URI.create("https://github.com/kit-data-manager/ro-crate-java/issues/5")) - .addProperty("description", "my new file that I added") - .build(); -``` - -A `DataEntity` and its subclasses can have a local file associated with them, -instead of one located on the web (which link is the ID of the data entity). Example: - -Example adding file: -```java -new FileEntity.FileEntityBuilder() - .addContent(Paths.get("file"), "new_file.txt") - .addProperty("description", "my new local file that I added") - .build(); -``` - -### Contextual Entities - -Contextual entities cannot be associated with a file (they are pure metadata). - -To add a contextual entity to a crate you use the function `.addContextualEntity(ContextualEntity entity)`. -Some types of derived/specializes entities are: -1. `OrganizationEntity` -2. `PersonEntity` -3. `PlaceEntity` - -If you need another type of contextual entity, use the base class `ContextualEntity`. - -The library provides a way to automatically create contextual entities from external providers. Currently, support for [ORCID](https://orcid.org/) and [ROR](https://ror.org/) is implemented. Example: -```java -PersonEntity person = ORCIDProvider.getPerson("https://orcid.org/*") -OrganizationEntity organization = RORProvider.getOrganization("https://ror.org/*"); -``` - -### Writing Crate to folder, zip file, or zip stream - -Writing to folder: -```java -RoCrateWriter folderRoCrateWriter = new RoCrateWriter(new FolderWriter()); -folderRoCrateWriter.save(roCrate, "destinationFolder"); -``` - -Writing to zip file: -```java -RoCrateWriter roCrateZipWriter = new RoCrateWriter(new ZipWriter()); -roCrateZipWriter.save(roCrate, "destinationFolder"); -``` - -Writing to zip stream: -```java -RoCrateWriter roCrateZipStreamWriter = new RoCrateWriter(new ZipStreamWriter()); -roCrateZipStreamWriter.save(roCrate, outputStream); -``` - -More writing strategies can be implemented, if required. - -### Reading / importing Crate from folder or zip - -Reading from folder: -```java -RoCrateReader roCrateFolderReader = new RoCrateReader(new FolderReader()); -RoCrate res = roCrateFolderReader.readCrate("destinationFolder"); -``` - -Reading from zip file: -```java -RoCrateReader roCrateFolderReader = new RoCrateReader(new ZipReader()); -RoCrate crate = roCrateFolderReader.readCrate("sourceZipFile"); -``` - -Reading from zip stream: -```java -RoCrateReader roCrateFolderReader = new RoCrateReader(new ZipStreamReader()); -RoCrate crate = roCrateFolderReader.readCrate(inputStream); -``` - -### RO-Crate Website (HTML preview file) -ro-crate-java offers tree different kinds of previews: - -* AutomaticPreview: Uses third-party library [ro-crate-html-js](https://www.npmjs.com/package/ro-crate-html-js), which must be installed separately. -* CustomPreview: Pure Java-based preview using an included template processed by the FreeMarker template engine. At the same time, CustomPreview is the fallback for AutomaticPreview if ro-crate-html-js is not installed. -* StaticPreview: Allows to provide a static HTML page (including additional dependencies, e.g., CSS, JS) which is then shipped with the RO-Crate. - -When creating a new RO-Crate using the builder, the default setting is to use CustomPreview. If you want to change this behaviour, thr preview method is set as follows: - -```java -RoCrate roCrate = new RoCrateBuilder("name", "description", "datePublished", "licenseIdentifier") - .setPreview(new AutomaticPreview()) + .addDataEntity(/*...*/) + .addContextualEntity(/*...*/) .build(); ``` -Keep in mind that, if you want to use AutomaticPreview, you have to install ro-crate-html-js via `npm install --global ro-crate-html-js` first. +A built or imported crate can of course also be modified afterwards. Take a look at our further documentation: -For StaticPreview, the constuctor is a bit different, such that it looks as follows: - -```java -File pathToMainPreviewHtml = new File("localPath"); -File pathToAdditionalFiles = new File("localFolder"); -RoCrate roCrate = new RoCrateBuilder("name", "description", "datePublished", "licenseIdentifier") - .setPreview(new StaticPreview(pathToMainPreviewHtml, pathToAdditionalFiles)) - .build(); -``` - -### RO-Crate validation (machine-readable crate profiles) -Right now, the only implemented way of validating a RO-crate is to use a [JSON-Schema](https://json-schema.org/) that the crates metadata JSON file should match. JSON-Schema is an established standard and therefore a good choice for a crate profile. Example: - -```java -Validator validator = new Validator(new JsonSchemaValidation("./schema.json")); -boolean valid = validator.validate(crate); -``` - -## Adapting the specification examples - -This section describes how to generate the [official specifications examples](https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#minimal-example-of-ro-crate). Each example first shows the ro-crate-metadata.json and, below that, the required Java code to generate it. - -### [Minimal example](https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#minimal-example-of-ro-crate) - -```json -{ "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - - { - "@type": "CreativeWork", - "@id": "ro-crate-metadata.json", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, - "about": {"@id": "./"} - }, - { - "@id": "./", - "identifier": "https://doi.org/10.4225/59/59672c09f4a4b", - "@type": "Dataset", - "datePublished": "2017", - "name": "Data files associated with the manuscript:Effects of facilitated family case conferencing for ...", - "description": "Palliative care planning for nursing home residents with advanced dementia ...", - "license": {"@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/"} - }, - { - "@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/", - "@type": "CreativeWork", - "description": "This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Australia License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/au/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.", - "identifier": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/", - "name": "Attribution-NonCommercial-ShareAlike 3.0 Australia (CC BY-NC-SA 3.0 AU)" - } - ] -} -``` - -Here, everything is created manually. -For the following examples, more convenient creation methods are used. - -```java - RoCrate crate = new RoCrate(); - - ContextualEntity license = new ContextualEntity.ContextualEntityBuilder() - .addType("CreativeWork") - .setId("https://creativecommons.org/licenses/by-nc-sa/3.0/au/") - .addProperty("description", "This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Australia License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/au/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.") - .addProperty("identifier", "https://creativecommons.org/licenses/by-nc-sa/3.0/au/") - .addProperty("name", "Attribution-NonCommercial-ShareAlike 3.0 Australia (CC BY-NC-SA 3.0 AU)") - .build(); - - crate.setRootDataEntity(new RootDataEntity.RootDataEntityBuilder() - .addProperty("identifier", "https://doi.org/10.4225/59/59672c09f4a4b") - .addProperty("datePublished", "2017") - .addProperty("name", "Data files associated with the manuscript:Effects of facilitated family case conferencing for ...") - .addProperty("description", "Palliative care planning for nursing home residents with advanced dementia ...") - .setLicense(license) - .build()); - - crate.setJsonDescriptor(new ContextualEntity.ContextualEntityBuilder() - .setId("ro-crate-metadata.json") - .addType("CreativeWork") - .addIdProperty("about", "./") - .addIdProperty("conformsTo", "https://w3id.org/ro/crate/1.1") - .build() - ); - crate.addContextualEntity(license); -``` - -### [Example with files](https://www.researchobject.org/ro-crate/1.1/data-entities.html#example-linking-to-a-file-and-folders) - -```json -{ "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - { - "@type": "CreativeWork", - "@id": "ro-crate-metadata.json", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, - "about": {"@id": "./"} - }, - { - "@id": "./", - "@type": [ - "Dataset" - ], - "hasPart": [ - { - "@id": "cp7glop.ai" - }, - { - "@id": "lots_of_little_files/" - } - ] - }, - { - "@id": "cp7glop.ai", - "@type": "File", - "name": "Diagram showing trend to increase", - "contentSize": "383766", - "description": "Illustrator file for Glop Pot", - "encodingFormat": "application/pdf" - }, - { - "@id": "lots_of_little_files/", - "@type": "Dataset", - "name": "Too many files", - "description": "This directory contains many small files, that we're not going to describe in detail." - } - ] -} -``` - -Here we use the inner builder classes for the construction of the crate. -Doing so, the Metadata File Descriptor and the Root Data Entity entities are added automatically. -`setSource()` is used to provide the actual location of these Data Entities (if they are not remote). -The Data Entity file in the crate will have the name of the entity's ID. - -```java - RoCrate crate = new RoCrate.RoCrateBuilder() - .addDataEntity( - new FileEntity.FileEntityBuilder() - .addContent (Paths.get("path to file"), "cp7glop.ai") - .addProperty("name", "Diagram showing trend to increase") - .addProperty("contentSize", "383766") - .addProperty("description", "Illustrator file for Glop Pot") - .setEncodingFormat("application/pdf") - .build() - ) - .addDataEntity( - new DataSetEntity.DataSetBuilder() - .addContent (Paths.get("path_to_files"), "lots_of_little_files/") - .addProperty("name", "Too many files") - .addProperty("description", "This directory contains many small files, that we're not going to describe in detail.") - .build() - ) - .build(); -``` - -### [Example with web resources](https://www.researchobject.org/ro-crate/1.1/data-entities.html#web-based-data-entities) - -```json -{ "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - { - "@type": "CreativeWork", - "@id": "ro-crate-metadata.json", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, - "about": {"@id": "./"} - }, - { - "@id": "./", - "@type": [ - "Dataset" - ], - "hasPart": [ - { - "@id": "survey-responses-2019.csv" - }, - { - "@id": "https://zenodo.org/record/3541888/files/ro-crate-1.0.0.pdf" - }, - ] - }, - { - "@id": "survey-responses-2019.csv", - "@type": "File", - "name": "Survey responses", - "contentSize": "26452", - "encodingFormat": "text/csv" - }, - { - "@id": "https://zenodo.org/record/3541888/files/ro-crate-1.0.0.pdf", - "@type": "File", - "name": "RO-Crate specification", - "contentSize": "310691", - "description": "RO-Crate specification", - "encodingFormat": "application/pdf" - } -] -} -``` - -The web resource does not use `.setSource()`, but uses the ID to indicate the file's location. - -```java - RoCrate crate = new RoCrate.RoCrateBuilder() - .addDataEntity( - new FileEntity.FileEntityBuilder() - .addContent (Paths.get("README.md"), "survey-responses-2019.csv") - .addProperty("name", "Survey responses") - .addProperty("contentSize", "26452") - .setEncodingFormat("text/csv") - .build() - ) - .addDataEntity( - new FileEntity.FileEntityBuilder() - .addContent(URI.create("https://zenodo.org/record/3541888/files/ro-crate-1.0.0.pdf")) - .addProperty("name", "RO-Crate specification") - .addProperty("contentSize", "310691") - .addProperty("description", "RO-Crate specification") - .setEncodingFormat("application/pdf") - .build() - ) - .build(); -``` - -### [Example with file, author, location](https://www.researchobject.org/ro-crate/1.1/appendix/jsonld.html) - -```json -{ "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - - { - "@type": "CreativeWork", - "@id": "ro-crate-metadata.json", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, - "about": {"@id": "./"}, - "description": "RO-Crate Metadata File Descriptor (this file)" - }, - { - "@id": "./", - "@type": "Dataset", - "name": "Example RO-Crate", - "description": "The RO-Crate Root Data Entity", - "datePublished": "2020", - "license": {"@id": "https://spdx.org/licenses/CC-BY-NC-SA-4.0"}, - "hasPart": [ - {"@id": "data1.txt"}, - {"@id": "data2.txt"} - ] - }, - { - "@id": "data1.txt", - "@type": "File", - "description": "One of hopefully many Data Entities", - "author": {"@id": "#alice"}, - "contentLocation": {"@id": "http://sws.geonames.org/8152662/"} - }, - { - "@id": "data2.txt", - "@type": "File" - }, - - { - "@id": "#alice", - "@type": "Person", - "name": "Alice", - "description": "One of hopefully many Contextual Entities" - }, - { - "@id": "http://sws.geonames.org/8152662/", - "@type": "Place", - "name": "Catalina Park" - } - ] -} -``` - -If there is no special method for including relative entities (ID properties) one can use `.addIdProperty("key","value")`. - -```java - PersonEntity alice = new PersonEntity.PersonEntityBuilder() - .setId("#alice") - .addProperty("name", "Alice") - .addProperty("description", "One of hopefully many Contextual Entities") - .build(); - PlaceEntity park = new PlaceEntity.PlaceEntityBuilder() - .addContent(URI.create("http://sws.geonames.org/8152662/")) - .addProperty("name", "Catalina Park") - .build(); - - RoCrate crate = new RoCrate.RoCrateBuilder("Example RO-Crate", "The RO-Crate Root Data Entity", "2020", "https://spdx.org/licenses/CC-BY-NC-SA-4.0") - .addContextualEntity(park) - .addContextualEntity(alice) - .addDataEntity( - new FileEntity.FileEntityBuilder() - .addContent(Paths.get("......."), "data2.txt") - .build() - ) - .addDataEntity( - new FileEntity.FileEntityBuilder() - .addContent(Paths.get("......."), "data1.txt") - .addProperty("description", "One of hopefully many Data Entities") - .addAuthor(alice.getId()) - .addIdProperty("contentLocation", park) - .build() - ) - .build(); - -``` -### [Example with computational workflow](https://www.researchobject.org/ro-crate/1.1/workflows.html#complete-workflow-example) - -```json -{ "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - { - "@type": "CreativeWork", - "@id": "ro-crate-metadata.json", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, - "about": {"@id": "./"} - }, - { - "@id": "./", - "@type": "Dataset", - "name": "Example RO-Crate", - "description": "The RO-Crate Root Data Entity", - "datePublished": "2020", - "license": {"@id": "https://spdx.org/licenses/CC-BY-NC-SA-4.0"}, - "hasPart": [ - { "@id": "workflow/alignment.knime" } - ] - }, - { - "@id": "workflow/alignment.knime", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], - "conformsTo": - {"@id": "https://bioschemas.org/profiles/ComputationalWorkflow/0.5-DRAFT-2020_07_21/"}, - "name": "Sequence alignment workflow", - "programmingLanguage": {"@id": "#knime"}, - "creator": {"@id": "#alice"}, - "dateCreated": "2020-05-23", - "license": { "@id": "https://spdx.org/licenses/CC-BY-NC-SA-4.0"}, - "input": [ - { "@id": "#36aadbd4-4a2d-4e33-83b4-0cbf6a6a8c5b"} - ], - "output": [ - { "@id": "#6c703fee-6af7-4fdb-a57d-9e8bc4486044"}, - { "@id": "#2f32b861-e43c-401f-8c42-04fd84273bdf"} - ], - "sdPublisher": {"@id": "#workflow-hub"}, - "url": "http://example.com/workflows/alignment", - "version": "0.5.0" - }, - { - "@id": "#36aadbd4-4a2d-4e33-83b4-0cbf6a6a8c5b", - "@type": "FormalParameter", - "conformsTo": {"@id": "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/"}, - "name": "genome_sequence", - "valueRequired": true, - "additionalType": {"@id": "http://edamontology.org/data_2977"}, - "format": {"@id": "http://edamontology.org/format_1929"} - }, - { - "@id": "#6c703fee-6af7-4fdb-a57d-9e8bc4486044", - "@type": "FormalParameter", - "conformsTo": {"@id": "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/"}, - "name": "cleaned_sequence", - "additionalType": {"@id": "http://edamontology.org/data_2977"}, - "encodingFormat": {"@id": "http://edamontology.org/format_2572"} - }, - { - "@id": "#2f32b861-e43c-401f-8c42-04fd84273bdf", - "@type": "FormalParameter", - "conformsTo": {"@id": "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/"}, - "name": "sequence_alignment", - "additionalType": {"@id": "http://edamontology.org/data_1383"}, - "encodingFormat": {"@id": "http://edamontology.org/format_1982"} - }, - { - "@id": "https://spdx.org/licenses/CC-BY-NC-SA-4.0", - "@type": "CreativeWork", - "name": "Creative Commons Attribution Non Commercial Share Alike 4.0 International", - "alternateName": "CC-BY-NC-SA-4.0" - }, - { - "@id": "#knime", - "@type": "ProgrammingLanguage", - "name": "KNIME Analytics Platform", - "alternateName": "KNIME", - "url": "https://www.knime.com/whats-new-in-knime-41", - "version": "4.1.3" - }, - { - "@id": "#alice", - "@type": "Person", - "name": "Alice Brown" - }, - { - "@id": "#workflow-hub", - "@type": "Organization", - "name": "Example Workflow Hub", - "url":"http://example.com/workflows/" - }, - { - "@id": "http://edamontology.org/format_1929", - "@type": "Thing", - "name": "FASTA sequence format" - }, - { - "@id": "http://edamontology.org/format_1982", - "@type": "Thing", - "name": "ClustalW alignment format" - }, - { - "@id": "http://edamontology.org/format_2572", - "@type": "Thing", - "name": "BAM format" - }, - { - "@id": "http://edamontology.org/data_2977", - "@type": "Thing", - "name": "Nucleic acid sequence" - }, - { - "@id": "http://edamontology.org/data_1383", - "@type": "Thing", - "name": "Nucleic acid sequence alignment" - } - ] -} -``` - - -```java - ContextualEntity license = new ContextualEntity.ContextualEntityBuilder() - .addType("CreativeWork") - .setId("https://spdx.org/licenses/CC-BY-NC-SA-4.0") - .addProperty("name", "Creative Commons Attribution Non Commercial Share Alike 4.0 International") - .addProperty("alternateName", "CC-BY-NC-SA-4.0") - .build(); - ContextualEntity knime = new ContextualEntity.ContextualEntityBuilder() - .setId("#knime") - .addType("ProgrammingLanguage") - .addProperty("name", "KNIME Analytics Platform") - .addProperty("alternateName", "KNIME") - .addProperty("url", "https://www.knime.com/whats-new-in-knime-41") - .addProperty("version", "4.1.3") - .build(); - OrganizationEntity workflowHub = new OrganizationEntity.OrganizationEntityBuilder() - .setId("#workflow-hub") - .addProperty("name", "Example Workflow Hub") - .addProperty("url", "http://example.com/workflows/") - .build(); - ContextualEntity fasta = new ContextualEntity.ContextualEntityBuilder() - .setId("http://edamontology.org/format_1929") - .addType("Thing") - .addProperty("name", "FASTA sequence format") - .build(); - ContextualEntity clustalW = new ContextualEntity.ContextualEntityBuilder() - .setId("http://edamontology.org/format_1982") - .addType("Thing") - .addProperty("name", "ClustalW alignment format") - .build(); - ContextualEntity ban = new ContextualEntity.ContextualEntityBuilder() - .setId("http://edamontology.org/format_2572") - .addType("Thing") - .addProperty("name", "BAM format") - .build(); - ContextualEntity nucSec = new ContextualEntity.ContextualEntityBuilder() - .setId("http://edamontology.org/data_2977") - .addType("Thing") - .addProperty("name", "Nucleic acid sequence") - .build(); - ContextualEntity nucAlign = new ContextualEntity.ContextualEntityBuilder() - .setId("http://edamontology.org/data_1383") - .addType("Thing") - .addProperty("name", "Nucleic acid sequence alignment") - .build(); - PersonEntity alice = new PersonEntity.PersonEntityBuilder() - .setId("#alice") - .addProperty("name", "Alice Brown") - .build(); - ContextualEntity requiredParam = new ContextualEntity.ContextualEntityBuilder() - .addType("FormalParameter") - .setId("#36aadbd4-4a2d-4e33-83b4-0cbf6a6a8c5b") - .addProperty("name", "genome_sequence") - .addProperty("valueRequired", true) - .addIdProperty("conformsTo", "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/") - .addIdProperty("additionalType", nucSec) - .addIdProperty("encodingFormat", fasta) - .build(); - ContextualEntity clnParam = new ContextualEntity.ContextualEntityBuilder() - .addType("FormalParameter") - .setId("#6c703fee-6af7-4fdb-a57d-9e8bc4486044") - .addProperty("name", "cleaned_sequence") - .addIdProperty("conformsTo", "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/") - .addIdProperty("additionalType", nucSec) - .addIdProperty("encodingFormat", ban) - .build(); - ContextualEntity alignParam = new ContextualEntity.ContextualEntityBuilder() - .addType("FormalParameter") - .setId("#2f32b861-e43c-401f-8c42-04fd84273bdf") - .addProperty("name", "sequence_alignment") - .addIdProperty("conformsTo", "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/") - .addIdProperty("additionalType", nucAlign) - .addIdProperty("encodingFormat", clustalW) - .build(); - - RoCrate crate = new RoCrate.RoCrateBuilder("Example RO-Crate", "The RO-Crate Root Data Entity", "2020", "https://spdx.org/licenses/CC-BY-NC-SA-4.0") - .addContextualEntity(license) - .addContextualEntity(knime) - .addContextualEntity(workflowHub) - .addContextualEntity(fasta) - .addContextualEntity(clustalW) - .addContextualEntity(ban) - .addContextualEntity(nucSec) - .addContextualEntity(nucAlign) - .addContextualEntity(alice) - .addContextualEntity(requiredParam) - .addContextualEntity(clnParam) - .addContextualEntity(alignParam) - .addDataEntity( - new WorkflowEntity.WorkflowEntityBuilder() - .setId("workflow/alignment.knime") - .setSource(new File("src")) - .addIdProperty("conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/0.5-DRAFT-2020_07_21/") - .addProperty("name", "Sequence alignment workflow") - .addIdProperty("programmingLanguage", "#knime") - .addAuthor("#alice") - .addProperty("dateCreated", "2020-05-23") - .setLicense("https://spdx.org/licenses/CC-BY-NC-SA-4.0") - .addInput("#36aadbd4-4a2d-4e33-83b4-0cbf6a6a8c5b") - .addOutput("#6c703fee-6af7-4fdb-a57d-9e8bc4486044") - .addOutput("#2f32b861-e43c-401f-8c42-04fd84273bdf") - .addProperty("url", "http://example.com/workflows/alignment") - .addProperty("version", "0.5.0") - .addIdProperty("sdPublisher", "#workflow-hub") - .build() - - ) - .build(); -``` +- **There is a well-documented example-driven guide in [LearnByExampleTest.java](src/test/java/edu/kit/datamanager/ro_crate/examples/LearnByExampleTest.java) to help you get started.** +- You may also be interested in the examples we extracted from the [specification in version 1.1](https://www.researchobject.org/ro-crate/1.1/), which are available in [ExamplesOfSpecificationV1p1Test.java](src/test/java/edu/kit/datamanager/ro_crate/examples/ExamplesOfSpecificationV1p1Test.java). +- There is a [module with all well-described guiding tests](src/test/java/edu/kit/datamanager/ro_crate/examples/) available. +- The [JavaDoc Documentation](https://javadoc.io/doc/edu.kit.datamanager/ro-crate-java) is also available online. diff --git a/build.gradle b/build.gradle index cd6c189f..f7fcd613 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ repositories { } ext { - jacksonVersion = '2.18.3' + jacksonVersion = '2.19.0' } dependencies { @@ -67,10 +67,15 @@ dependencies { implementation group: "com.networknt", name: "json-schema-validator", version: "1.5.6" implementation 'org.glassfish:jakarta.json:2.0.1' //JTE for template processing - implementation('gg.jte:jte:3.2.0') + implementation('gg.jte:jte:3.2.1') implementation("org.freemarker:freemarker:2.3.34") } +// enable -Xlint:deprecation +tasks.withType(JavaCompile).configureEach { + options.compilerArgs << "-Xlint:deprecation" +} + logging.captureStandardOutput LogLevel.INFO def signingTasks = tasks.withType(Sign) diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 9bbc975c..1b33c55b 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 37f853b1..ca025c83 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-bin.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/gradlew b/gradlew index faf93008..23d15a93 100755 --- a/gradlew +++ b/gradlew @@ -114,7 +114,7 @@ case "$( uname )" in #( NONSTOP* ) nonstop=true ;; esac -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar +CLASSPATH="\\\"\\\"" # Determine the Java command to use to start the JVM. @@ -213,7 +213,7 @@ DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ -classpath "$CLASSPATH" \ - org.gradle.wrapper.GradleWrapperMain \ + -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ "$@" # Stop when "xargs" is not available. diff --git a/gradlew.bat b/gradlew.bat index 9b42019c..5eed7ee8 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -70,11 +70,11 @@ goto fail :execute @rem Setup the command line -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar +set CLASSPATH= @rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %* :end @rem End local scope for the variables with windows NT shell diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 330135b7..356b0159 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -10,13 +10,10 @@ import edu.kit.datamanager.ro_crate.entities.AbstractEntity; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.contextual.JsonDescriptor; -import edu.kit.datamanager.ro_crate.entities.contextual.OrganizationEntity; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; -import edu.kit.datamanager.ro_crate.entities.data.DataEntity.DataEntityBuilder; -import edu.kit.datamanager.ro_crate.entities.data.FileEntity; + import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity; import edu.kit.datamanager.ro_crate.externalproviders.dataentities.ImportFromDataCite; -import edu.kit.datamanager.ro_crate.externalproviders.organizationprovider.RorProvider; import edu.kit.datamanager.ro_crate.objectmapper.MyObjectMapper; import edu.kit.datamanager.ro_crate.payload.CratePayload; import edu.kit.datamanager.ro_crate.payload.RoCratePayload; @@ -26,12 +23,9 @@ import edu.kit.datamanager.ro_crate.special.JsonUtilFunctions; import edu.kit.datamanager.ro_crate.validation.JsonSchemaValidation; import edu.kit.datamanager.ro_crate.validation.Validator; -import edu.kit.datamanager.ro_crate.writer.FolderWriter; -import edu.kit.datamanager.ro_crate.writer.RoCrateWriter; import java.io.File; import java.net.URI; -import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -354,6 +348,18 @@ public RoCrateBuilder addName(String name) { return this; } + /** + * Adds an "identifier" property to the root data entity. + *
+ * This is useful e.g. to assign e.g. a DOI to this crate.
+ * @param identifier the identifier to add.
+ * @return this builder.
+ */
+ public RoCrateBuilder addIdentifier(String identifier) {
+ this.rootDataEntity.addProperty("identifier", identifier.strip());
+ return this;
+ }
+
public RoCrateBuilder addDescription(String description) {
this.rootDataEntity.addProperty(PROPERTY_DESCRIPTION, description);
return this;
diff --git a/src/main/java/edu/kit/datamanager/ro_crate/context/RoCrateMetadataContext.java b/src/main/java/edu/kit/datamanager/ro_crate/context/RoCrateMetadataContext.java
index 731a4c1a..cce6a7e9 100644
--- a/src/main/java/edu/kit/datamanager/ro_crate/context/RoCrateMetadataContext.java
+++ b/src/main/java/edu/kit/datamanager/ro_crate/context/RoCrateMetadataContext.java
@@ -113,9 +113,10 @@ public boolean checkEntity(AbstractEntity entity) {
node.remove("@id");
node.remove("@type");
- Set
+ * The current value can be null without errors.
+ * Only the id will be considered in this case.
+ *
+ * If the id is null-ish, it will not be added, similar to a null-ish value.
+ * If the id is already present, nothing will be done.
+ * If it is not an array and the id is not present, an array will be applied.
+ *
+ * @param id the id to add.
+ * @param currentValue the current value of the property.
+ * @return The updated value of the property.
+ * Empty if value does not change!
+ */
+ protected static Optional
+ * NOTE: IDs are not just names! The ID may have effects
+ * on parts of your crate! For example: If the entity represents a
+ * file which will be copied into the crate, writers must use the
+ * ID as filename.
*
* @param id the String representing the id.
* @return the generic builder.
@@ -486,11 +519,11 @@ public T addProperty(String key, boolean value) {
* @return the generic builder
*/
public T addIdProperty(String name, String id) {
- JsonNode jsonNode = AbstractEntity.addToIdProperty(name, id, this.properties.get(name));
- if (jsonNode != null) {
- this.properties.set(name, jsonNode);
- this.relatedItems.add(id);
- }
+ AbstractEntity.mergeIdIntoValue(id, this.properties.get(name))
+ .ifPresent(newValue -> {
+ this.properties.set(name, newValue);
+ this.relatedItems.add(id);
+ });
return self();
}
@@ -526,13 +559,37 @@ public T addIdFromCollectionOfEntities(String name, Collection
+ * Valid means here that the json object needs to be flat as specified
+ * in the RO-Crate specification. In principle, this means that
+ * primitives and objects referencing an ID are allowed,
+ * as well as arrays of these.
+ *
+ * @param properties the Json representing all the properties.
+ * @return the generic builder, either including all given properties
+ * or unchanged.
+ */
+ public T setAllIfValid(ObjectNode properties) {
if (AbstractEntity.entityValidation.entityValidation(properties)) {
this.properties = properties;
this.relatedItems.addAll(JsonUtilFunctions.getIdPropertiesFromJsonNode(properties));
@@ -540,6 +597,24 @@ public T setAll(ObjectNode properties) {
return self();
}
+ /**
+ * This sets everything from a json object to the property. Can be
+ * useful when the entity is already available somewhere.
+ *
+ * Errors on validation are printed, but everything will be added.
+ * For more about validation, see {@link #setAllIfValid(ObjectNode)}.
+ *
+ * @param properties the Json representing all the properties.
+ * @return the generic builder with all properties added.
+ */
+ public T setAllUnsafe(ObjectNode properties) {
+ // This will currently only print errors.
+ AbstractEntity.entityValidation.entityValidation(properties);
+ this.properties = properties;
+ this.relatedItems.addAll(JsonUtilFunctions.getIdPropertiesFromJsonNode(properties));
+ return self();
+ }
+
public abstract T self();
public abstract AbstractEntity build();
diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/contextual/JsonDescriptor.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/contextual/JsonDescriptor.java
index 8bb91294..88aaf89e 100644
--- a/src/main/java/edu/kit/datamanager/ro_crate/entities/contextual/JsonDescriptor.java
+++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/contextual/JsonDescriptor.java
@@ -13,8 +13,8 @@
public class JsonDescriptor extends ContextualEntity {
- private static final String CONFORMS_TO = "conformsTo";
- protected static final String ID = "ro-crate-metadata.json";
+ protected static final String CONFORMS_TO = "conformsTo";
+ public static final String ID = "ro-crate-metadata.json";
/**
* Returns a JsonDescriptor with the conformsTo value set to the latest stable
@@ -39,7 +39,7 @@ private JsonDescriptor(ContextualEntityBuilder builder) {
/**
* Builder for the JsonDescriptor.
- *
+ *
* Defaults to the latest stable crate version and no other conformsTo values.
*/
public static final class Builder {
diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataEntity.java
index e6e28f8f..864d3044 100644
--- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataEntity.java
+++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataEntity.java
@@ -5,19 +5,11 @@
import edu.kit.datamanager.ro_crate.entities.AbstractEntity;
import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity;
import static edu.kit.datamanager.ro_crate.special.IdentifierUtils.isUrl;
-import edu.kit.datamanager.ro_crate.util.ZipUtil;
-import java.io.File;
-import java.io.IOException;
import java.net.URI;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
-import net.lingala.zip4j.ZipFile;
-import net.lingala.zip4j.exception.ZipException;
-import net.lingala.zip4j.io.outputstream.ZipOutputStream;
-import net.lingala.zip4j.model.ZipParameters;
-import org.apache.commons.io.FileUtils;
/**
* The base class of every data entity.
@@ -56,54 +48,6 @@ public void addAuthorId(String id) {
this.addIdProperty("author", id);
}
- /**
- * If the data entity contains a physical file. This method will write it
- * when the crate is being written to a zip archive.
- *
- * @param zipFile the zipFile where it should be written.
- * @throws ZipException when something goes wrong with the writing to the
- * zip file.
- */
- public void saveToZip(ZipFile zipFile) throws ZipException {
- if (this.path != null) {
- ZipParameters zipParameters = new ZipParameters();
- zipParameters.setFileNameInZip(this.getId());
- zipFile.addFile(this.path.toFile(), zipParameters);
- }
- }
-
- /**
- * If the data entity contains a physical file. This method will write it
- * when the crate is being written to a zip archive.
- *
- * @param zipStream The zip output stream where it should be written.
- * @throws ZipException when something goes wrong with the writing to the
- * zip file.
- * @throws IOException If opening the file input stream fails.
- */
- public void saveToStream(ZipOutputStream zipStream) throws ZipException, IOException {
- if (this.path != null) {
- ZipUtil.addFileToZipStream(zipStream, this.path.toFile(), this.getId());
- }
- }
-
- /**
- * If the data entity contains a physical file. This method will write it
- * when the crate is being written to a folder.
- *
- * @param file the folder location where the entity should be written.
- * @throws IOException if something goes wrong with the writing.
- */
- public void savetoFile(File file) throws IOException {
- if (this.getPath() != null) {
- if (this.getPath().toFile().isDirectory()) {
- FileUtils.copyDirectory(this.getPath().toFile(), file.toPath().resolve(this.getId()).toFile());
- } else {
- FileUtils.copyFile(this.getPath().toFile(), file.toPath().resolve(this.getId()).toFile());
- }
- }
- }
-
@JsonIgnore
public Path getPath() {
return path;
diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java
index 832d9819..2ef078ff 100644
--- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java
+++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java
@@ -4,15 +4,9 @@
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import edu.kit.datamanager.ro_crate.entities.serializers.HasPartSerializer;
-import edu.kit.datamanager.ro_crate.util.ZipUtil;
-import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
-import net.lingala.zip4j.ZipFile;
-import net.lingala.zip4j.exception.ZipException;
-import net.lingala.zip4j.io.outputstream.ZipOutputStream;
-import net.lingala.zip4j.model.ZipParameters;
/**
* A helping class for the creating of Data entities of type Dataset.
@@ -43,26 +37,6 @@ public void removeFromHasPart(String str) {
this.hasPart.remove(str);
}
- @Override
- public void saveToZip(ZipFile zipFile) throws ZipException {
- if (this.getPath() != null) {
- ZipParameters parameters = new ZipParameters();
- parameters.setRootFolderNameInZip(this.getId());
- parameters.setIncludeRootFolder(false);
- zipFile.addFolder(this.getPath().toFile(), parameters);
- }
- }
-
- @Override
- public void saveToStream(ZipOutputStream zipOutputStream) throws IOException {
- if (this.getPath() != null) {
- ZipUtil.addFolderToZipStream(
- zipOutputStream,
- this.getPath().toAbsolutePath().toString(),
- this.getId());
- }
- }
-
public void addToHasPart(String id) {
this.hasPart.add(id);
}
diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/validation/JsonSchemaValidation.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/validation/JsonSchemaValidation.java
index 067f5e2e..18e9624a 100644
--- a/src/main/java/edu/kit/datamanager/ro_crate/entities/validation/JsonSchemaValidation.java
+++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/validation/JsonSchemaValidation.java
@@ -60,6 +60,7 @@ public boolean validateEntity(JsonNode entity) {
Set
+ * IMPORTANT NOTE: This method currently has a default implementation that relies
+ * on deprecated methods. In future, you will have to implement this method directly.
+ *
+ * @param crate the crate to generate a preview for.
+ * @param targetDir the target directory to store the preview in,
+ * owned by the caller.
+ * @throws IOException if an error occurs while generating the preview.
+ */
+ default void generate(Crate crate, File targetDir) throws IOException {
+ // disable preview generation to avoid recursion,
+ // as this is usually called in the process of writing a crate
+ // (including preview)
+ new CrateWriter<>(new WriteFolderStrategy().disablePreview())
+ .save(crate, targetDir.getAbsolutePath());
+ this.saveAllToFolder(targetDir);
+ try (var stream = Files.list(targetDir.toPath())) {
+ stream
+ .filter(path -> !path.getFileName().toString().equals("ro-crate-preview.html"))
+ .filter(path -> !path.getFileName().toString().equals("ro-crate-preview_files"))
+ .forEach(path -> {
+ try {
+ if (Files.isDirectory(path)) {
+ FileUtils.deleteDirectory(path.toFile());
+ } else {
+ Files.delete(path);
+ }
+ } catch (IOException e) {
+ // Silently ignore deletion errors
+ LoggerFactory.getLogger(CratePreview.class)
+ .error("Failed to delete temporary file {}", path, e);
+ }
+ });
+ }
+ }
+
+ /**
+ * Takes a crate in form of a zip file and generates a preview of it,
+ * which will be stored within the crate.
+ *
+ * @param zipFile the zip file with the crate, which should receive a preview.
+ * @throws IOException if an error occurs while saving the preview
+ *
+ * @deprecated Use {@link #generate(Crate, File)} instead.
+ */
+ @Deprecated(since = "2.1.0", forRemoval = true)
void saveAllToZip(ZipFile zipFile) throws IOException;
+ /**
+ * Saves the preview, given by the folder, into the given folder.
+ *
+ * @param folder the folder (containing a crate) to save the preview in.
+ * @throws IOException if an error occurs while saving the preview.
+ *
+ * @deprecated Use {@link #generate(Crate, File)} instead.
+ */
+ @Deprecated(since = "2.1.0", forRemoval = true)
void saveAllToFolder(File folder) throws IOException;
-
+
+ /**
+ * Saves the preview, given by the metadata, into the given stream.
+ *
+ * @param metadata the metadata of the crate to save the preview in.
+ * @param stream the stream to save the preview in.
+ * @throws IOException if an error occurs while saving the preview.
+ *
+ * @deprecated Use {@link #generate(Crate, File)} instead.
+ */
+ @Deprecated(since = "2.1.0", forRemoval = true)
void saveAllToStream(String metadata, ZipOutputStream stream) throws IOException;
}
diff --git a/src/main/java/edu/kit/datamanager/ro_crate/preview/CustomPreview.java b/src/main/java/edu/kit/datamanager/ro_crate/preview/CustomPreview.java
index 5c8c9fa2..77300a87 100644
--- a/src/main/java/edu/kit/datamanager/ro_crate/preview/CustomPreview.java
+++ b/src/main/java/edu/kit/datamanager/ro_crate/preview/CustomPreview.java
@@ -2,7 +2,7 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
-import edu.kit.datamanager.ro_crate.util.ZipUtil;
+import edu.kit.datamanager.ro_crate.util.ZipStreamUtil;
import freemarker.template.Configuration;
import freemarker.template.Template;
import freemarker.template.TemplateException;
@@ -53,7 +53,7 @@ public CustomPreview() {
private CustomPreviewModel mapFromJson(String metadata) throws IOException {
ObjectMapper mapper = new ObjectMapper();
- JsonNode root = (JsonNode) mapper.readValue(metadata, JsonNode.class);
+ JsonNode root = mapper.readValue(metadata, JsonNode.class);
JsonNode graph = root.get("@graph");
CustomPreviewModel.ROCrate crate = new CustomPreviewModel.ROCrate();
List
- * The reader consideres "hasPart" and "isPartOf" properties and considers all
+ * The reader considers "hasPart" and "isPartOf" properties and considers all
* entities (in-)directly connected to the root entity ("./") as DataEntities.
*
* @param
- * May be used as a dependency for CrateReader. It will unzip
- * the ZipFile in a path relative to the directory this application runs in.
- * By default, it will be `./.tmp/ro-crate-java/zipReader/$UUID/`.
+ * This class handles reading and extraction of RO-Crate content from ZIP archives
+ * into a temporary directory structure on the file system,
+ * which allows accessing the contained files.
+ *
+ * Supports ELN-Style crates,
+ * meaning the crate may be either in the zip archive directly or in a single,
+ * direct subfolder beneath the root folder (/folder).
+ *
+ * Note: This implementation checks for up to 50 subdirectories if multiple are present.
+ * This is to avoid zip bombs, which may contain a lot of subdirectories,
+ * and at the same time gracefully handle valid crated with hidden subdirectories
+ * (for example, thumbnails).
*
* NOTE: The resulting crate may refer to these temporary files. Therefore,
* these files are only being deleted before the JVM exits. If you need to free
@@ -27,16 +39,19 @@
* persistent location and possibly read it from there, if required. Or use
* the ZipWriter to write it back to its source.
*/
-public class ZipStrategy implements GenericReaderStrategy
+ * The default configuration is to extract the ZipFile to
+ * `./.tmp/ro-crate-java/zipReader/$UUID/`.
*/
- public ZipStrategy() {}
+ public ReadZipStrategy() {}
/**
* Creates a ZipReader which will extract the contents temporary
@@ -49,7 +64,7 @@ public ZipStrategy() {}
* directory. These subdirectories
* will have UUIDs as their names.
*/
- public ZipStrategy(Path folderPath, boolean shallAddUuidSubfolder) {
+ public ReadZipStrategy(Path folderPath, boolean shallAddUuidSubfolder) {
if (shallAddUuidSubfolder) {
this.temporaryFolder = folderPath.resolve(ID);
} else {
@@ -78,46 +93,46 @@ public boolean isExtracted() {
return isExtracted;
}
- private void readCrate(String location) {
- try {
- File folder = temporaryFolder.toFile();
- // ensure the directory is clean
- if (folder.isDirectory()) {
- FileUtils.cleanDirectory(folder);
- } else if (folder.isFile()) {
- FileUtils.delete(folder);
- }
- // extract
- try (ZipFile zf = new ZipFile(location)) {
- zf.extractAll(temporaryFolder.toAbsolutePath().toString());
- this.isExtracted = true;
- }
- // register deletion on exit
- FileUtils.forceDeleteOnExit(folder);
- } catch (IOException e) {
- e.printStackTrace();
+ private void readCrate(String location) throws IOException {
+ File folder = temporaryFolder.toFile();
+ FileSystemUtil.mkdirOrDeleteContent(folder);
+ // extract
+ try (ZipFile zf = new ZipFile(location)) {
+ zf.extractAll(temporaryFolder.toAbsolutePath().toString());
+ this.isExtracted = true;
}
+ // register deletion on exit
+ FileUtils.forceDeleteOnExit(folder);
}
@Override
- public ObjectNode readMetadataJson(String location) {
+ public ObjectNode readMetadataJson(String location) throws IOException {
if (!isExtracted) {
this.readCrate(location);
}
ObjectMapper objectMapper = MyObjectMapper.getMapper();
- File jsonMetadata = temporaryFolder.resolve("ro-crate-metadata.json").toFile();
-
- try {
- return objectMapper.readTree(jsonMetadata).deepCopy();
- } catch (IOException e) {
- e.printStackTrace();
- return null;
+ File jsonMetadata = this.temporaryFolder.resolve(JsonDescriptor.ID).toFile();
+ if (!jsonMetadata.isFile()) {
+ // Try to find the metadata file in subdirectories
+ File firstSubdir = FileUtils.listFilesAndDirs(
+ temporaryFolder.toFile(),
+ FileFilterUtils.directoryFileFilter(),
+ null // not recursive
+ )
+ .stream()
+ .limit(50)
+ .filter(file -> file.toPath().toAbsolutePath().resolve(JsonDescriptor.ID).toFile().isFile())
+ .findFirst()
+ .orElseThrow(() -> new IllegalStateException("No %s found in zip file".formatted(JsonDescriptor.ID)));
+ jsonMetadata = firstSubdir.toPath().resolve(JsonDescriptor.ID).toFile();
}
+
+ return objectMapper.readTree(jsonMetadata).deepCopy();
}
@Override
- public File readContent(String location) {
+ public File readContent(String location) throws IOException {
if (!isExtracted) {
this.readCrate(location);
}
diff --git a/src/main/java/edu/kit/datamanager/ro_crate/reader/ReadZipStreamStrategy.java b/src/main/java/edu/kit/datamanager/ro_crate/reader/ReadZipStreamStrategy.java
new file mode 100644
index 00000000..3cc41086
--- /dev/null
+++ b/src/main/java/edu/kit/datamanager/ro_crate/reader/ReadZipStreamStrategy.java
@@ -0,0 +1,176 @@
+package edu.kit.datamanager.ro_crate.reader;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import edu.kit.datamanager.ro_crate.entities.contextual.JsonDescriptor;
+import edu.kit.datamanager.ro_crate.objectmapper.MyObjectMapper;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Path;
+import java.util.UUID;
+
+import edu.kit.datamanager.ro_crate.util.FileSystemUtil;
+import net.lingala.zip4j.io.inputstream.ZipInputStream;
+import net.lingala.zip4j.model.LocalFileHeader;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Reads a crate from a streamed ZIP archive.
+ *
+ * This class handles reading and extraction of RO-Crate content from ZIP archives
+ * into a temporary directory structure on the file system,
+ * which allows accessing the contained files.
+ *
+ * Supports ELN-Style crates,
+ * meaning the crate may be either in the zip archive directly or in a single,
+ * direct subfolder beneath the root folder (/folder).
+ *
+ * Note: This implementation checks for up to 50 subdirectories if multiple are present.
+ * This is to avoid zip bombs, which may contain a lot of subdirectories,
+ * and at the same time gracefully handle valid crated with hidden subdirectories
+ * (for example, thumbnails).
+ *
+ * NOTE: The resulting crate may refer to these temporary files. Therefore,
+ * these files are only being deleted before the JVM exits. If you need to free
+ * space because your application is long-running or creates a lot of
+ * crates, you may use the getters to retrieve information which will help
+ * you to clean up manually. Keep in mind that crates may refer to this
+ * folder after extraction. Use RoCrateWriter to export it so some
+ * persistent location and possibly read it from there, if required. Or use
+ * the ZipWriter to write it back to its source.
+ *
+ * @author jejkal
+ */
+public class ReadZipStreamStrategy implements GenericReaderStrategy
+ * The default configuration is to extract the ZipFile to
+ * `./.tmp/ro-crate-java/zipStreamReader/%UUID/`.
+ */
+ public ReadZipStreamStrategy() {}
+
+ /**
+ * Creates a ZipStreamReader which will extract the contents temporary to
+ * the given location instead of the default location.
+ *
+ * @param folderPath the custom directory to extract content to for
+ * temporary access.
+ * @param shallAddUuidSubfolder if true, the reader will extract into
+ * subdirectories of the given directory. These subdirectories will have
+ * UUIDs as their names.
+ */
+ public ReadZipStreamStrategy(Path folderPath, boolean shallAddUuidSubfolder) {
+ if (shallAddUuidSubfolder) {
+ this.temporaryFolder = folderPath.resolve(ID);
+ } else {
+ this.temporaryFolder = folderPath;
+ }
+ }
+
+ /**
+ * @return the identifier which may be used as the name for a subfolder in
+ * the temporary directory.
+ */
+ public String getID() {
+ return ID;
+ }
+
+ /**
+ * @return the folder (considered temporary) where the zipped crate will be
+ * or has been extracted to.
+ */
+ public Path getTemporaryFolder() {
+ return temporaryFolder;
+ }
+
+ /**
+ * @return whether the crate has already been extracted into the temporary
+ * folder.
+ */
+ public boolean isExtracted() {
+ return isExtracted;
+ }
+
+ /**Read the crate metadata and content from the provided input stream.
+ *
+ * @param stream The input stream.
+ */
+ private void readCrate(InputStream stream) throws IOException {
+ File folder = temporaryFolder.toFile();
+ FileSystemUtil.mkdirOrDeleteContent(folder);
+
+ LocalFileHeader localFileHeader;
+ int readLen;
+ byte[] readBuffer = new byte[4096];
+
+ try (ZipInputStream zipInputStream = new ZipInputStream(stream)) {
+ while ((localFileHeader = zipInputStream.getNextEntry()) != null) {
+ String fileName = localFileHeader.getFileName();
+ File extractedFile = new File(folder, fileName).getCanonicalFile();
+ Path targetRoot = folder.toPath().toRealPath();
+ if (!extractedFile.toPath().startsWith(targetRoot)) {
+ throw new IOException("Entry is outside of target directory: " + fileName);
+ }
+ if (localFileHeader.isDirectory()) {
+ FileUtils.forceMkdir(extractedFile);
+ continue;
+ }
+ FileUtils.forceMkdir(extractedFile.getParentFile());
+ try (OutputStream outputStream = new FileOutputStream(extractedFile)) {
+ while ((readLen = zipInputStream.read(readBuffer)) != -1) {
+ outputStream.write(readBuffer, 0, readLen);
+ }
+ }
+ }
+ }
+ this.isExtracted = true;
+ // register deletion on exit
+ FileUtils.forceDeleteOnExit(folder);
+ }
+
+ @Override
+ public ObjectNode readMetadataJson(InputStream stream) throws IOException {
+ if (!isExtracted) {
+ this.readCrate(stream);
+ }
+
+ ObjectMapper objectMapper = MyObjectMapper.getMapper();
+ File jsonMetadata = temporaryFolder.resolve(JsonDescriptor.ID).toFile();
+ if (!jsonMetadata.isFile()) {
+ // Try to find the metadata file in subdirectories
+ File firstSubdir = FileUtils.listFilesAndDirs(
+ temporaryFolder.toFile(),
+ FileFilterUtils.directoryFileFilter(),
+ null
+ )
+ .stream()
+ .limit(50)
+ .filter(file -> file.toPath().toAbsolutePath().resolve(JsonDescriptor.ID).toFile().isFile())
+ .findFirst()
+ .orElseThrow(() -> new IllegalStateException("No %s found in zip file".formatted(JsonDescriptor.ID)));
+ jsonMetadata = firstSubdir.toPath().resolve(JsonDescriptor.ID).toFile();
+ }
+ return objectMapper.readTree(jsonMetadata).deepCopy();
+ }
+
+ @Override
+ public File readContent(InputStream stream) throws IOException {
+ if (!isExtracted) {
+ this.readCrate(stream);
+ }
+ return temporaryFolder.toFile();
+ }
+}
diff --git a/src/main/java/edu/kit/datamanager/ro_crate/reader/Readers.java b/src/main/java/edu/kit/datamanager/ro_crate/reader/Readers.java
index 49e09cb1..83a43701 100644
--- a/src/main/java/edu/kit/datamanager/ro_crate/reader/Readers.java
+++ b/src/main/java/edu/kit/datamanager/ro_crate/reader/Readers.java
@@ -19,10 +19,10 @@ private Readers() {}
*
* @return A reader configured for ZIP files
*
- * @see ZipStreamStrategy#ZipStreamStrategy()
+ * @see ReadZipStreamStrategy#ReadZipStreamStrategy()
*/
public static CrateReader
+ * Example:
+ * filterExtensionsFromFileName("test.eln", Set.of("ELN")) -> "test"
+ *
+ * @param filename the file name to filter (must not be null)
+ * @param extensionsToRemove the extensions to remove (must not be null)
+ * @return the filtered file name
+ * @throws NullPointerException if any parameter is null
+ */
+ public static String filterExtensionsFromFileName(String filename, Collection
+ * This is supposed to serve both as a user guide and as a test for the implementation.
+ * Executing a test may also print some interesting information to the console.
+ */
+public class ExamplesOfSpecificationV1p1Test {
+
+ /**
+ * From:
+ * Minimal Example
+ * (location in repo)
+ *
+ * This example produces a minimal crate with a
+ * name, description, date, license and identifier.
+ *
+ * This example produces the same result as
+ * {@link #testMinimalCrateWithoutCrateBuilder()}, but using more convenient APIs.
+ */
+ @Test
+ void testMinimalCrateConvenient() {
+ String licenseID = "https://creativecommons.org/licenses/by-nc-sa/3.0/au/";
+ RoCrate minimal = new RoCrate.RoCrateBuilder(
+ "Data files associated with the manuscript:Effects of facilitated family case conferencing for ...",
+ "Palliative care planning for nursing home residents with advanced dementia ...",
+ "2017",
+ licenseID
+ )
+ // We already had to set the license ID in the builder,
+ // but we can override it with more details to fit the example:
+ .setLicense( new ContextualEntity.ContextualEntityBuilder()
+ .addType("CreativeWork")
+ .setId(licenseID)
+ .addProperty("description", "This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Australia License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/au/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.")
+ .addProperty("identifier", licenseID)
+ .addProperty("name", "Attribution-NonCommercial-ShareAlike 3.0 Australia (CC BY-NC-SA 3.0 AU)")
+ .build()
+ )
+ .addIdentifier("https://doi.org/10.4225/59/59672c09f4a4b")
+ .build();
+
+ printAndAssertEquals(minimal, "/spec-v1.1-example-json-files/minimal.json");
+ }
+
+ /**
+ * From:
+ * Minimal Example
+ * (location in repo)
+ *
+ * In this example, the minimal crate is created without the builder.
+ * This should only be done if necessary: Use the builder if possible.
+ * This example produces the same result as {@link #testMinimalCrateConvenient()}.
+ */
+ @Test
+ void testMinimalCrateWithoutCrateBuilder() {
+ RoCrate minimal = new RoCrate();
+
+ ContextualEntity license = new ContextualEntity.ContextualEntityBuilder()
+ .addType("CreativeWork")
+ .setId("https://creativecommons.org/licenses/by-nc-sa/3.0/au/")
+ .addProperty("description", "This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Australia License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/au/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.")
+ .addProperty("identifier", "https://creativecommons.org/licenses/by-nc-sa/3.0/au/")
+ .addProperty("name", "Attribution-NonCommercial-ShareAlike 3.0 Australia (CC BY-NC-SA 3.0 AU)")
+ .build();
+
+ minimal.setRootDataEntity(new RootDataEntity.RootDataEntityBuilder()
+ .addProperty("identifier", "https://doi.org/10.4225/59/59672c09f4a4b")
+ .addProperty("datePublished", "2017")
+ .addProperty("name", "Data files associated with the manuscript:Effects of facilitated family case conferencing for ...")
+ .addProperty("description", "Palliative care planning for nursing home residents with advanced dementia ...")
+ .setLicense(license)
+ .build());
+
+ // This is pretty low-level. We are considering hiding/replacing this detailed API in major versions,
+ // so tell us (for example, open an issue) if you have a use case for it!
+ minimal.setJsonDescriptor(new ContextualEntity.ContextualEntityBuilder()
+ .setId("ro-crate-metadata.json")
+ .addType("CreativeWork")
+ .addIdProperty("about", "./")
+ .addIdProperty("conformsTo", "https://w3id.org/ro/crate/1.1")
+ .build()
+ );
+ minimal.addContextualEntity(license);
+
+ printAndAssertEquals(minimal, "/spec-v1.1-example-json-files/minimal.json");
+ }
+
+ // https://www.researchobject.org/ro-crate/specification/1.1/data-entities.html#example-linking-to-a-file-and-folders
+
+ /**
+ * From:
+ * "Example linking to a file and folders"
+ * (location in repo)
+ *
+ * This example adds a File(Entity) and a DataSet(Entity) to the crate.
+ * The file and the folder are referenced by their location. This way
+ * they will be copied to the crate when writing it using a
+ * {@link CrateWriter}.
+ * The name of the file and the folder will be implicitly set to the
+ * ID of the respective entity in order to conform to the specification.
+ *
+ * Here we use the inner builder classes for the construction of the
+ * crate. In contrast to {@link #testMinimalCrateWithoutCrateBuilder()},
+ * we do not have to care about specification details.
+ */
+ @Test
+ void testLinkingToFileAndFolders() {
+ RoCrate crate = new RoCrate.RoCrateBuilder()
+ .addDataEntity(
+ new FileEntity.FileEntityBuilder()
+ // This will tell us where the file is located. It will be copied to the crate.
+ .setLocation(Paths.get("path to file"))
+ // If no ID is given explicitly, the ID will be set to the filename.
+ // Changing the ID means also to set the file name within the crate!
+ .setId("cp7glop.ai")
+ .addProperty("name", "Diagram showing trend to increase")
+ .addProperty("contentSize", "383766")
+ .addProperty("description", "Illustrator file for Glop Pot")
+ .setEncodingFormat("application/pdf")
+ .build()
+ )
+ .addDataEntity(
+ new DataSetEntity.DataSetBuilder()
+ .setLocation(Paths.get("path_to_files"))
+ .setId("lots_of_little_files/")
+ .addProperty("name", "Too many files")
+ .addProperty("description", "This directory contains many small files, that we're not going to describe in detail.")
+ .build()
+ )
+ .build();
+
+ printAndAssertEquals(crate, "/spec-v1.1-example-json-files/files-and-folders.json");
+ }
+
+ /**
+ * From:
+ * Example with web-based data entities
+ * (location in repo)
+ *
+ * This example adds twp FileEntities to the crate.
+ * One is a local file, the other one is located in the web
+ * and will not be copied to the crate.
+ */
+ @Test
+ void testWebBasedDataEntities() {
+ RoCrate crate = new RoCrate.RoCrateBuilder()
+ .addDataEntity(
+ new FileEntity.FileEntityBuilder()
+ .setLocation(Paths.get("README.md"))
+ .setId("survey-responses-2019.csv")
+ .addProperty("name", "Survey responses")
+ .addProperty("contentSize", "26452")
+ .setEncodingFormat("text/csv")
+ .build()
+ )
+ .addDataEntity(
+ new FileEntity.FileEntityBuilder()
+ .setLocation(URI.create("https://zenodo.org/record/3541888/files/ro-crate-1.0.0.pdf"))
+ .addProperty("name", "RO-Crate specification")
+ .addProperty("contentSize", "310691")
+ .addProperty("description", "RO-Crate specification")
+ .setEncodingFormat("application/pdf")
+ .build()
+ )
+ .build();
+
+ printAndAssertEquals(crate, "/spec-v1.1-example-json-files/web-based-data-entities.json");
+ }
+
+ /**
+ * From:
+ * Example with file, author, and location
+ * (location in repo)
+ *
+ * This example shows how to connect entities. If there is no specific method like
+ * {@link DataEntity.DataEntityBuilder#addAuthor(String)} for referencing other
+ * entities, one can use the more generic
+ * {@link AbstractEntity.AbstractEntityBuilder#addIdProperty(String, AbstractEntity)}
+ * or {@link AbstractEntity.AbstractEntityBuilder#addIdProperty(String, String)}.
+ *
+ * Important Note! If you connect entities, make sure all entities are being
+ * added to the crate. We currently can't enforce this properly yet.
+ */
+ @Test
+ void testWithFileAuthorLocation() {
+ // These two entities will be connected to others later on. Therefore, we make
+ // them easier referencable. Referencing can be done using the whole entity or
+ // its ID.
+ final PersonEntity alice = new PersonEntity.PersonEntityBuilder()
+ .setId("#alice")
+ .addProperty("name", "Alice")
+ .addProperty("description", "One of hopefully many Contextual Entities")
+ .build();
+ final PlaceEntity park = new PlaceEntity.PlaceEntityBuilder()
+ .setId(URI.create("http://sws.geonames.org/8152662/").toString())
+ .addProperty("name", "Catalina Park")
+ .build();
+ final String licenseId = "https://spdx.org/licenses/CC-BY-NC-SA-4.0";
+
+ final RoCrate crate = new RoCrate.RoCrateBuilder(
+ "Example RO-Crate",
+ "The RO-Crate Root Data Entity",
+ "2020",
+ licenseId
+ )
+ .addContextualEntity(park)
+ .addContextualEntity(alice)
+ .addDataEntity(
+ new FileEntity.FileEntityBuilder()
+ .setLocation(Paths.get("......."))
+ .setId("data2.txt")
+ .build()
+ )
+ .addDataEntity(
+ new FileEntity.FileEntityBuilder()
+ .setLocation(Paths.get("......."))
+ .setId("data1.txt")
+ .addProperty("description", "One of hopefully many Data Entities")
+ // ↓ This is the specific way to add an author
+ .addAuthor(alice.getId())
+ // ↓ This is the generic way to add a location or other relations
+ .addIdProperty("contentLocation", park)
+ .build()
+ )
+ .build();
+
+ /*
+ The builder enforces to provide a license and a publishing date,
+ but the example does not have them. So we have to remove them below:
+ */
+
+ // **Note**: When you add a license, even if only by a string, the crate will
+ // implicitly also get a small ContextEntity for this license. When we remove
+ // this (any) entity, all references to it will be removed as well to ensure
+ // consistency within the crate. Therefore, there will be no trace left of
+ // the license.
+ crate.deleteEntityById(licenseId);
+
+ // The datePublished property is a simple property and simple to remove without
+ // any further internal checks.
+ crate.getRootDataEntity().removeProperty("datePublished");
+
+ printAndAssertEquals(crate, "/spec-v1.1-example-json-files/file-author-location.json");
+ }
+
+ /**
+ * From:
+ * Example with complete workflow
+ * (location in repo)
+ *
+ * This example shows how to connect entities. If there is no specific method like
+ * {@link DataEntity.DataEntityBuilder#addAuthor(String)} for referencing other
+ * entities, one can use the more generic
+ * {@link AbstractEntity.AbstractEntityBuilder#addIdProperty(String, AbstractEntity)}
+ * or {@link AbstractEntity.AbstractEntityBuilder#addIdProperty(String, String)}.
+ *
+ * Important Note! If you connect entities, make sure all entities are being
+ * added to the crate. We currently can't enforce this properly yet.
+ */
+ @Test
+ void testCompleteWorkflowExample() {
+ final String licenseId = "https://spdx.org/licenses/CC-BY-NC-SA-4.0";
+ ContextualEntity license = new ContextualEntity.ContextualEntityBuilder()
+ .addType("CreativeWork")
+ .setId(licenseId)
+ .addProperty("name", "Creative Commons Attribution Non Commercial Share Alike 4.0 International")
+ .addProperty("alternateName", "CC-BY-NC-SA-4.0")
+ .build();
+ ContextualEntity knime = new ContextualEntity.ContextualEntityBuilder()
+ .setId("#knime")
+ .addType("ComputerLanguage")
+ .addProperty("name", "KNIME Analytics Platform")
+ .addProperty("alternateName", "KNIME")
+ .addProperty("url", "https://www.knime.com/whats-new-in-knime-41")
+ .addProperty("version", "4.1.3")
+ .build();
+ OrganizationEntity workflowHub = new OrganizationEntity.OrganizationEntityBuilder()
+ .setId("#workflow-hub")
+ .addProperty("name", "Example Workflow Hub")
+ .addProperty("url", "http://example.com/workflows/")
+ .build();
+ ContextualEntity fasta = new ContextualEntity.ContextualEntityBuilder()
+ .setId("http://edamontology.org/format_1929")
+ .addType("Thing")
+ .addProperty("name", "FASTA sequence format")
+ .build();
+ ContextualEntity clustalW = new ContextualEntity.ContextualEntityBuilder()
+ .setId("http://edamontology.org/format_1982")
+ .addType("Thing")
+ .addProperty("name", "ClustalW alignment format")
+ .build();
+ ContextualEntity ban = new ContextualEntity.ContextualEntityBuilder()
+ .setId("http://edamontology.org/format_2572")
+ .addType("Thing")
+ .addProperty("name", "BAM format")
+ .build();
+ ContextualEntity nucSec = new ContextualEntity.ContextualEntityBuilder()
+ .setId("http://edamontology.org/data_2977")
+ .addType("Thing")
+ .addProperty("name", "Nucleic acid sequence")
+ .build();
+ ContextualEntity nucAlign = new ContextualEntity.ContextualEntityBuilder()
+ .setId("http://edamontology.org/data_1383")
+ .addType("Thing")
+ .addProperty("name", "Nucleic acid sequence alignment")
+ .build();
+ PersonEntity alice = new PersonEntity.PersonEntityBuilder()
+ .setId("#alice")
+ .addProperty("name", "Alice Brown")
+ .build();
+ ContextualEntity requiredParam = new ContextualEntity.ContextualEntityBuilder()
+ .addType("FormalParameter")
+ .setId("#36aadbd4-4a2d-4e33-83b4-0cbf6a6a8c5b")
+ .addProperty("name", "genome_sequence")
+ .addProperty("valueRequired", true)
+ .addIdProperty("conformsTo", "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/")
+ .addIdProperty("additionalType", nucSec)
+ .addIdProperty("format", fasta)
+ .build();
+ ContextualEntity clnParam = new ContextualEntity.ContextualEntityBuilder()
+ .addType("FormalParameter")
+ .setId("#6c703fee-6af7-4fdb-a57d-9e8bc4486044")
+ .addProperty("name", "cleaned_sequence")
+ .addIdProperty("conformsTo", "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/")
+ .addIdProperty("additionalType", nucSec)
+ .addIdProperty("encodingFormat", ban)
+ .build();
+ ContextualEntity alignParam = new ContextualEntity.ContextualEntityBuilder()
+ .addType("FormalParameter")
+ .setId("#2f32b861-e43c-401f-8c42-04fd84273bdf")
+ .addProperty("name", "sequence_alignment")
+ .addIdProperty("conformsTo", "https://bioschemas.org/profiles/FormalParameter/0.1-DRAFT-2020_07_21/")
+ .addIdProperty("additionalType", nucAlign)
+ .addIdProperty("encodingFormat", clustalW)
+ .build();
+
+ RoCrate crate = new RoCrate.RoCrateBuilder(
+ "Example RO-Crate",
+ "The RO-Crate Root Data Entity",
+ "2020",
+ licenseId
+ )
+ .setLicense(license)
+ .addContextualEntity(knime)
+ .addContextualEntity(workflowHub)
+ .addContextualEntity(fasta)
+ .addContextualEntity(clustalW)
+ .addContextualEntity(ban)
+ .addContextualEntity(nucSec)
+ .addContextualEntity(nucAlign)
+ .addContextualEntity(alice)
+ .addContextualEntity(requiredParam)
+ .addContextualEntity(clnParam)
+ .addContextualEntity(alignParam)
+ .addDataEntity(
+ new WorkflowEntity.WorkflowEntityBuilder()
+ .setId("workflow/alignment.knime")
+ .setLocation(Paths.get("src"))
+ .addIdProperty("conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/0.5-DRAFT-2020_07_21/")
+ .addProperty("name", "Sequence alignment workflow")
+ .addIdProperty("programmingLanguage", "#knime")
+ // This example does not use the term "author"...
+ //.addAuthor("#alice")
+ // instead, it uses "creator":
+ .addIdProperty("creator", "#alice")
+ .addProperty("dateCreated", "2020-05-23")
+ .setLicense(licenseId)
+ .addInput("#36aadbd4-4a2d-4e33-83b4-0cbf6a6a8c5b")
+ .addOutput("#6c703fee-6af7-4fdb-a57d-9e8bc4486044")
+ .addOutput("#2f32b861-e43c-401f-8c42-04fd84273bdf")
+ .addProperty("url", "http://example.com/workflows/alignment")
+ .addProperty("version", "0.5.0")
+ .addIdProperty("sdPublisher", "#workflow-hub")
+ .build()
+ )
+ .build();
+
+ // Similar to the previous example, this example from the specification
+ // spared out some details we now need to remove.
+ // Here we do not want to remove the license, only the reference to our root data entity.
+ // This is because (the way we constructed the crate) other entities use the license as well.
+ crate.getRootDataEntity().removeProperty("license");
+ crate.getRootDataEntity().removeProperty("datePublished");
+ crate.getRootDataEntity().removeProperty("name");
+ crate.getRootDataEntity().removeProperty("description");
+
+ printAndAssertEquals(crate, "/spec-v1.1-example-json-files/complete-workflow-example.json");
+ }
+}
diff --git a/src/test/java/edu/kit/datamanager/ro_crate/examples/LearnByExampleTest.java b/src/test/java/edu/kit/datamanager/ro_crate/examples/LearnByExampleTest.java
new file mode 100644
index 00000000..89c5a8f8
--- /dev/null
+++ b/src/test/java/edu/kit/datamanager/ro_crate/examples/LearnByExampleTest.java
@@ -0,0 +1,421 @@
+package edu.kit.datamanager.ro_crate.examples;
+
+import edu.kit.datamanager.ro_crate.HelpFunctions;
+import edu.kit.datamanager.ro_crate.RoCrate;
+import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity;
+import edu.kit.datamanager.ro_crate.entities.contextual.OrganizationEntity;
+import edu.kit.datamanager.ro_crate.entities.contextual.PersonEntity;
+import edu.kit.datamanager.ro_crate.entities.data.DataEntity;
+import edu.kit.datamanager.ro_crate.entities.data.FileEntity;
+import edu.kit.datamanager.ro_crate.externalproviders.organizationprovider.RorProvider;
+import edu.kit.datamanager.ro_crate.externalproviders.personprovider.OrcidProvider;
+import edu.kit.datamanager.ro_crate.preview.AutomaticPreview;
+import edu.kit.datamanager.ro_crate.preview.StaticPreview;
+import edu.kit.datamanager.ro_crate.reader.CrateReader;
+import edu.kit.datamanager.ro_crate.reader.GenericReaderStrategy;
+import edu.kit.datamanager.ro_crate.reader.Readers;
+import edu.kit.datamanager.ro_crate.validation.JsonSchemaValidation;
+import edu.kit.datamanager.ro_crate.validation.Validator;
+import edu.kit.datamanager.ro_crate.writer.CrateWriter;
+import edu.kit.datamanager.ro_crate.writer.WriteFolderStrategy;
+import edu.kit.datamanager.ro_crate.writer.GenericWriterStrategy;
+import edu.kit.datamanager.ro_crate.writer.Writers;
+import org.apache.commons.io.FileUtils;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.*;
+import java.net.URI;
+import java.net.URL;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Objects;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * This class is meant to be a small example-driven introduction to the ro-crate-java library.
+ * It is meant to be read from top to bottom.
+ */
+public class LearnByExampleTest {
+
+ /**
+ * This creates a valid, empty RO-Crate builder.
+ */
+ static RoCrate.RoCrateBuilder NEW_STARTER_CRATE() {
+ return new RoCrate.RoCrateBuilder(
+ "name",
+ "description",
+ "2025",
+ "licenseIdentifier"
+ );
+ }
+
+ /**
+ * Calling the `build()` method on the builder creates a valid RO-Crate.
+ * Run this test to view the NEW_STARTER_CRATE() JSON in the console.
+ */
+ @Test
+ void aSimpleCrate() {
+ RoCrate almostEmptyCrate = NEW_STARTER_CRATE().build();
+ assertNotNull(almostEmptyCrate);
+ HelpFunctions.prettyPrintJsonString(almostEmptyCrate.getJsonMetadata());
+ }
+
+ /**
+ * This is how we can add things to a crate.
+ *
+ * Note that methods starting with `add` can be used multiple times to add more.
+ * For example, we can add multiple files or multiple contexts.
+ *
+ * On the other hand, methods starting with `set` will override previous calls.
+ *
+ * There may be inconsistencies yet, which are tracked here: Issue #242
+ */
+ @Test
+ void addingYourFirstEntity() {
+ RoCrate myFirstCrate = NEW_STARTER_CRATE()
+ // We can add new terms to our crate. The terms we can use are called "context".
+ .addValuePairToContext("Station", "www.station.com")
+ // We can also add whole contexts to our crate.
+ .addUrlToContext("contextUrl")
+ // Let's add a file to our crate.
+ .addDataEntity(
+ new FileEntity.FileEntityBuilder()
+ // For files (or folders, which are DataSetEntities),
+ // the ID determines the file name in the crate.
+ .setId("survey-responses-2019.csv")
+ // This is where we get the file from. The path will not be part of the metadata.
+ .setLocation(Paths.get("copy/from/this/file-and-rename-it.csv"))
+ // And now, the remaining metadata.
+ // Note that "name", "contentSize", and "encodingFormat"
+ // are already defined in our default context.
+ .addProperty("name", "Survey responses")
+ .addProperty("contentSize", "26452")
+ .addProperty("encodingFormat", "text/csv")
+ .build()
+ )
+ // We could add more, but let's keep it simple for now.
+ //.addDataEntity(...)
+ //.addContextualEntity(...)
+ //...
+ .build();
+
+ assertNotNull(myFirstCrate);
+ HelpFunctions.prettyPrintJsonString(myFirstCrate.getJsonMetadata());
+ }
+
+ /**
+ * The library currently comes with three specialized DataEntities:
+ *
+ * 1. `DataSetEntity`
+ * 2. `FileEntity` (used in the example above)
+ * 3. `WorkflowEntity`
+ *
+ * If another type of `DataEntity` is required,
+ * the base class `DataEntity` can be used. Example:
+ */
+ @Test
+ void specializingYourFirstEntity() {
+ RoCrate crate = NEW_STARTER_CRATE()
+ .addDataEntity(
+ // Let's do something custom:
+ new DataEntity.DataEntityBuilder()
+ // You need to add the type of your `DataEntity`
+ // because for DataEntity, there is no default.
+ .addType("CreativeWork")
+ .setId("myEntityInstance")
+ // Now that we are a CreativeWork instance,
+ // it is fine to use some of its properties.
+ .addProperty("https://schema.org/award", "Wow-award")
+ .build()
+ )
+ .build();
+
+ assertNotNull(crate);
+ HelpFunctions.prettyPrintJsonString(crate.getJsonMetadata());
+ }
+
+ /**
+ * A `DataEntity` and its subclasses can have a file located on the web.
+ * In this case, it does not need to reside in a crate's folder.
+ * This can be useful for large, publicly available files,
+ * or in order to reuse or share files.
+ *
+ * Note: Technically, an entity pointing to a file on the web is just an entity
+ * that uses the URL as an ID.
+ */
+ @Test
+ void referencingFilesOnTheWeb() {
+ // Let's say this is the file we would like to point at with an entity.
+ String lovelyFile = "https://github.com/kit-data-manager/ro-crate-java/issues/5";
+
+ RoCrate crate = NEW_STARTER_CRATE()
+ .addDataEntity(
+ // Build our entity to point to the file:
+ new FileEntity.FileEntityBuilder()
+ // Make it point to an external file.
+ .setLocation(URI.create(lovelyFile))
+ // This would do the same:
+ .setId(lovelyFile)
+ // don't forget to add metadata!
+ .addProperty("description", "my new file that I added")
+ .build()
+ )
+ .build();
+
+ assertNotNull(crate);
+ HelpFunctions.prettyPrintJsonString(crate.getJsonMetadata());
+ }
+
+ /**
+ * A `DataEntity` and its subclasses can have a local file associated with them,
+ * instead of one located on the web.
+ *
+ * @param tempDir We'll use this to create a temporary folder for our crate.
+ * @throws IOException If the file cannot be created or written to.
+ */
+ @Test
+ void includingFilesIntoTheCrateFolder(@TempDir Path tempDir) throws IOException {
+ // Let's say this is the file we would like to point at with an entity.
+ String lovelyFile = tempDir.resolve("my/experiment.csv").toString();
+ {
+ // (Let's quickly create a dummy file, but the rest will not make use of this knowledge.)
+ File lovelyFilePointer = new File(lovelyFile);
+ FileUtils.touch(lovelyFilePointer);
+ FileUtils.write(lovelyFilePointer, "My great experiment 001", "UTF-8");
+ }
+
+ // But in the crate we want it to be
+ String seriousExperimentFile = "fantastic-experiment/2025-01-01.csv";
+
+ RoCrate crate = NEW_STARTER_CRATE()
+ .addDataEntity(
+ // Build our entity to point to the file:
+ new FileEntity.FileEntityBuilder()
+ // Let's tell the library where to find and copy the file from.
+ .setLocation(Paths.get(lovelyFile))
+ // Let's tell it to adjust the file name and path in the crate.
+ .setId(seriousExperimentFile)
+ .addProperty("description", "my new local file that I added")
+ .build()
+ )
+ .build();
+
+ assertNotNull(crate);
+ HelpFunctions.prettyPrintJsonString(crate.getJsonMetadata());
+
+ // Let's write it to disk and see if the file is there!
+ // (We'll discuss writing and reading crates later on.)
+ Path crateFolder = tempDir.resolve("myCrate");
+ Writers.newFolderWriter().save(crate, crateFolder.toString());
+ assertTrue(crateFolder.resolve(seriousExperimentFile).toFile().exists());
+ }
+
+ /**
+ * Contextual entities cannot be associated with a file: they are pure metadata
+ * To add a contextual entity to a crate you use the function
+ * {@link RoCrate.RoCrateBuilder#addContextualEntity(ContextualEntity)}.
+ *
+ * Some types of derived/specializes entities are:
+ *
+ * 1. `OrganizationEntity`
+ * 2. `PersonEntity`
+ * 3. `PlaceEntity`
+ *
+ * If you need another type of contextual entity, use the base class
+ * {@link ContextualEntity}, similar to how we did it in
+ * {@link #specializingYourFirstEntity()}.
+ *
+ * The library provides a way to automatically create contextual entities from
+ * external providers. Currently, support for [ORCID](https://orcid.org/) and
+ * [ROR](https://ror.org/) is implemented.
+ * Check the module {@link edu.kit.datamanager.ro_crate.externalproviders} for
+ * more implementations.
+ */
+ @Test
+ void addingContextualEntities() {
+ PersonEntity person = OrcidProvider.getPerson("https://orcid.org/0000-0001-6575-1022");
+ OrganizationEntity organization = RorProvider.getOrganization("https://ror.org/04t3en479");
+
+ RoCrate crate = NEW_STARTER_CRATE()
+ .addContextualEntity(person)
+ .addContextualEntity(organization)
+ .build();
+
+ assertNotNull(crate);
+ HelpFunctions.prettyPrintJsonString(crate.getJsonMetadata());
+ }
+
+ /**
+ * RO-Crates are file based, but in your application you may want to create a crate
+ * on the fly and directly send it somewhere else without storing it on disk.
+ * This is why we can't only write to a folder or a zip file, but also to a stream
+ * (containing the zip file).
+ *
+ * There is a generic interface to implement Writers (and Readers), so even more
+ * exotic use cases should be possible. The readers work the same way.
+ *
+ * - {@link GenericWriterStrategy}
+ * - {@link GenericReaderStrategy}
+ */
+ @Test
+ void writingAndReadingCrates(@TempDir Path tempDir) throws IOException {
+ // Ok lets make a small, but not fully boring crate.
+ PersonEntity person = OrcidProvider.getPerson("https://orcid.org/0000-0001-6575-1022");
+ OrganizationEntity organization = RorProvider.getOrganization("https://ror.org/04t3en479");
+
+ RoCrate crate = NEW_STARTER_CRATE()
+ .addContextualEntity(person)
+ .addContextualEntity(organization)
+ .build();
+
+ assertNotNull(crate);
+ HelpFunctions.prettyPrintJsonString(crate.getJsonMetadata());
+
+ {
+ // Now, let's write it to a folder.
+ Path folder = tempDir.resolve("folderCrate");
+ Writers.newFolderWriter()
+ .save(crate, folder.toString());
+ // and read it back.
+ RoCrate read = Readers.newFolderReader()
+ .readCrate(folder.toAbsolutePath().toString());
+
+ HelpFunctions.compareTwoCrateJson(crate, read);
+ }
+
+ {
+ // Now, let's write it to a zip file.
+ Path zipFile = tempDir.resolve("zipCrate.zip");
+ Writers.newZipPathWriter()
+ .save(crate, zipFile.toString());
+ // and read it back.
+ RoCrate read = Readers.newZipPathReader()
+ .readCrate(zipFile.toAbsolutePath().toString());
+
+ HelpFunctions.compareTwoCrateJson(crate, read);
+ }
+
+ {
+ // Now, let's write it to a zip stream.
+ Path zipStreamFile = tempDir.resolve("zipStreamCrate.zip");
+ try (OutputStream outputStream = new FileOutputStream(zipStreamFile.toFile())) {
+ Writers.newZipStreamWriter().save(crate, outputStream);
+ }
+ // and read it back.
+ try (InputStream inputStream = new FileInputStream(zipStreamFile.toFile())) {
+ RoCrate read = Readers.newZipStreamReader()
+ .readCrate(inputStream);
+
+ HelpFunctions.compareTwoCrateJson(crate, read);
+ }
+ }
+ }
+
+ /**
+ * In {@link #writingAndReadingCrates(Path)} we already saw how to write or read
+ * a crate. We used the Readers and Writers classes to get the available options.
+ * But what if you want to write your own reader or writer strategy?
+ *
+ * Let's see how you can make a reader or writer, manually configuring the strategy.
+ */
+ @Test
+ void writingAndReadingStrategies(@TempDir Path tempDir) throws IOException {
+ // Ok lets make a small, but not fully boring crate.
+ PersonEntity person = OrcidProvider.getPerson("https://orcid.org/0000-0001-6575-1022");
+ OrganizationEntity organization = RorProvider.getOrganization("https://ror.org/04t3en479");
+
+ RoCrate crate = NEW_STARTER_CRATE()
+ .addContextualEntity(person)
+ .addContextualEntity(organization)
+ .build();
+
+ assertNotNull(crate);
+ HelpFunctions.prettyPrintJsonString(crate.getJsonMetadata());
+
+ // Now, let's write it to a folder. Note the used strategy could be replaced with your own.
+ Path folder = tempDir.resolve("folderCrate");
+ new CrateWriter<>(new WriteFolderStrategy())
+ .save(crate, folder.toString());
+ // and read it back.
+ RoCrate read = new CrateReader<>(
+ // Note: There are two WriteFolderStrategy implementations, one for reading and one for writing.
+ // Java is a bit bad with imports, so we use the fully qualified name here.
+ new edu.kit.datamanager.ro_crate.reader.ReadFolderStrategy()
+ )
+ .readCrate(folder.toAbsolutePath().toString());
+
+ HelpFunctions.compareTwoCrateJson(crate, read);
+ }
+
+ /**
+ * RO-Crate specified there should be a human-readable preview of the crate.
+ * This is a HTML file that can be opened in a browser.
+ * ro-crate-java offers three different ways to create this file:
+ *
+ * - AutomaticPreview: Uses third-party library
+ * ro-crate-html-js,
+ * which must be installed separately via `npm install --global ro-crate-html-js`.
+ *
+ * - CustomPreview: Pure Java-based preview using an included template processed by
+ * the FreeMarker template engine. At the same time, CustomPreview is the fallback
+ * for AutomaticPreview if ro-crate-html-js is not installed.
+ *
+ * - StaticPreview: Allows to provide a static HTML page (including additional
+ * dependencies, e.g., CSS, JS) which is then shipped with the RO-Crate.
+ *
+ * When creating a new RO-Crate using the builder, the default setting is to use
+ * CustomPreview. This example shows you how to change it.
+ */
+ @Test
+ void humanReadableContent() {
+ RoCrate crate = NEW_STARTER_CRATE()
+ .setPreview(new AutomaticPreview())
+ .build();
+
+ assertNotNull(crate);
+ }
+
+ /**
+ * A static preview means you'll just add your own HTML file to the crate.
+ * Therefore, the constructor is a bit more complicated.
+ */
+ @Test
+ void staticPreview(@TempDir Path tempDir) throws IOException {
+ File mainPreviewHtml = tempDir.resolve("mainPreview.html").toFile();
+ File additionalFilesDirectory = tempDir.resolve("additionalFiles").toFile();
+ FileUtils.forceMkdir(additionalFilesDirectory);
+ FileUtils.touch(mainPreviewHtml);
+
+ RoCrate crate = NEW_STARTER_CRATE()
+ .setPreview(new StaticPreview(mainPreviewHtml, additionalFilesDirectory))
+ .build();
+
+ assertNotNull(crate);
+ }
+
+ /**
+ * Crates can be validated.
+ * Right now, the only implemented way of validating a RO-crate is to use a
+ * [JSON-Schema](https://json-schema.org/) that the crate's metadata JSON file should
+ * match. JSON-Schema is an established standard and therefore a good choice for a
+ * crate profile. This example shows how to use it.
+ *
+ * Note: If you happen to implement your own validator anyway, please consider
+ * contributing your code!
+ */
+ @Test
+ void validation() {
+ // Let's find a schema file in the resources folder.
+ URL schemaUrl = Objects.requireNonNull(this.getClass().getResource("/crates/validation/workflowschema.json"));
+ String schemaPath = schemaUrl.getPath();
+
+ // This crate for sure is not a workflow, so validation will fail.
+ RoCrate crate = NEW_STARTER_CRATE().build();
+
+ // And now do the validation.
+ Validator validator = new Validator(new JsonSchemaValidation(schemaPath));
+ assertFalse(validator.validate(crate));
+ }
+}
diff --git a/src/test/java/edu/kit/datamanager/ro_crate/preview/PreviewTest.java b/src/test/java/edu/kit/datamanager/ro_crate/preview/PreviewTest.java
index 2db987b3..788e0121 100644
--- a/src/test/java/edu/kit/datamanager/ro_crate/preview/PreviewTest.java
+++ b/src/test/java/edu/kit/datamanager/ro_crate/preview/PreviewTest.java
@@ -1,13 +1,13 @@
package edu.kit.datamanager.ro_crate.preview;
import net.lingala.zip4j.ZipFile;
+import net.lingala.zip4j.io.outputstream.ZipOutputStream;
import net.lingala.zip4j.model.ZipParameters;
import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
-import java.io.IOException;
-import java.io.InputStream;
+import java.io.*;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -75,15 +75,52 @@ void staticPreviewSaveToZip(@TempDir Path dir) throws IOException {
assertTrue(FileUtils.contentEqualsIgnoreEOL(roDirFile.toFile(), fileInDir.toFile(), String.valueOf(Charset.defaultCharset())));
}
+ @Test
+ void staticPreviewSaveToZipStream(@TempDir Path dir) throws IOException {
+ var file1 = dir.resolve("file.html");
+ FileUtils.writeStringToFile(file1.toFile(), "random html, does not need to be valid for this test", Charset.defaultCharset());
+
+ var file2 = dir.resolve("directory");
+ var fileInDir = file2.resolve("fileInDir.html");
+ FileUtils.writeStringToFile(fileInDir.toFile(), "dajkdlfjdsklafj alksfjdalk fjl", Charset.defaultCharset());
+ StaticPreview preview = new StaticPreview(file1.toFile(), file2.toFile());
+
+ try (ZipOutputStream stream = new ZipOutputStream(new FileOutputStream(dir.resolve("destination.zip").toFile()))) {
+ preview.saveAllToStream(
+ null, // static preview does not need metadata
+ stream);
+ stream.flush();
+ }
+
+ try (ZipFile zf = new ZipFile(dir.resolve("destination.zip").toFile())) {
+ zf.extractAll(dir.resolve("extracted").toAbsolutePath().toString());
+ }
+
+ var e = dir.resolve("extracted");
+ var roPreview = e.resolve("ro-crate-preview.html");
+ var roDir = e.resolve("ro-crate-preview_files");
+ var roDirFile = roDir.resolve("fileInDir.html");
+ assertTrue(Files.isRegularFile(roPreview));
+ assertTrue(Files.isDirectory(roDir));
+ assertTrue(Files.isRegularFile(roDirFile));
+
+ assertTrue(FileUtils.contentEqualsIgnoreEOL(roPreview.toFile(), file1.toFile(), String.valueOf(Charset.defaultCharset())));
+ assertFalse(FileUtils.contentEqualsIgnoreEOL(roPreview.toFile(), fileInDir.toFile(), String.valueOf(Charset.defaultCharset())));
+
+ assertTrue(FileUtils.contentEqualsIgnoreEOL(roDirFile.toFile(), fileInDir.toFile(), String.valueOf(Charset.defaultCharset())));
+ }
+
@Test
void testAutomaticPreviewAddToFolder(@TempDir Path dir) throws IOException {
AutomaticPreview automaticPreview = new AutomaticPreview();
- InputStream crateJson = PreviewTest.class.getResourceAsStream("/crates/other/idrc_project/ro-crate-metadata.json");
Path crate = dir.resolve("crate");
// this crate will not have a json file
FileUtils.forceMkdir(crate.toFile());
- FileUtils.copyInputStreamToFile(crateJson, crate.resolve("ro-crate-metadata.json").toFile());
+ try (InputStream crateJson = PreviewTest.class.getResourceAsStream("/crates/other/idrc_project/ro-crate-metadata.json")) {
+ Assertions.assertNotNull(crateJson);
+ FileUtils.copyInputStreamToFile(crateJson, crate.resolve("ro-crate-metadata.json").toFile());
+ }
automaticPreview.saveAllToFolder(crate.toFile());
// there should be a html file generated
@@ -93,14 +130,14 @@ void testAutomaticPreviewAddToFolder(@TempDir Path dir) throws IOException {
@Test
void testAutomaticPreviewZip(@TempDir Path dir) throws IOException {
AutomaticPreview automaticPreview = new AutomaticPreview();
- InputStream crateJson = PreviewTest.class.getResourceAsStream("/crates/other/idrc_project/ro-crate-metadata.json");
Path crate = dir.resolve("crate");
ZipParameters zipParameters = new ZipParameters();
zipParameters.setFileNameInZip("ro-crate-metadata.json");
ZipFile zipFile = new ZipFile(dir.resolve("test.zip").toFile());
- zipFile.addStream(crateJson, zipParameters);
- crateJson.close();
+ try (InputStream crateJson = PreviewTest.class.getResourceAsStream("/crates/other/idrc_project/ro-crate-metadata.json")) {
+ zipFile.addStream(crateJson, zipParameters);
+ }
automaticPreview.saveAllToZip(zipFile);
@@ -116,55 +153,136 @@ void testAutomaticPreviewZip(@TempDir Path dir) throws IOException {
assertTrue(Files.isRegularFile(crate.resolve("ro-crate-preview.html")));
}
+ @Test
+ void testAutomaticPreviewZipStream(@TempDir Path dir) throws IOException {
+ AutomaticPreview preview = new AutomaticPreview();
+ String metadataPath = "/crates/other/idrc_project/ro-crate-metadata.json";
+ Path crate = dir.resolve("crate");
+
+ File zipFile = dir.resolve("test.zip").toFile();
+ try (
+ ZipFile zip = new ZipFile(zipFile);
+ InputStream crateJson = PreviewTest.class.getResourceAsStream(metadataPath)
+ ) {
+ ZipParameters zipParameters = new ZipParameters();
+ zipParameters.setFileNameInZip("ro-crate-metadata.json");
+ zip.addStream(crateJson, zipParameters);
+ }
+
+ String metadata;
+ try (InputStream metadataStream = PreviewTest.class.getResourceAsStream(metadataPath)) {
+ Assertions.assertNotNull(metadataStream);
+ metadata = new String(metadataStream.readAllBytes());
+ }
+
+ try (ZipOutputStream stream = new ZipOutputStream(new FileOutputStream(zipFile))) {
+ preview.saveAllToStream(metadata, stream);
+ stream.flush();
+ }
+
+ try {
+ // this should throw an exception but not stop the execution
+ ZipFile randomZipFile = new ZipFile(dir.resolve("dddd.zip").toFile());
+ preview.saveAllToZip(randomZipFile);
+ Assertions.fail("Expected IOException when providing invalid ZIP file for preview.");
+ } catch (IOException ex) {
+ //ok
+ }
+
+ try (ZipFile zipReader = new ZipFile(zipFile)) {
+ zipReader.extractAll(crate.toString());
+ }
+ assertTrue(Files.isRegularFile(crate.resolve("ro-crate-preview.html")));
+ }
+
@Test
void testCustomPreviewAddToFolder(@TempDir Path dir) throws IOException {
CustomPreview customPreview = new CustomPreview();
-
- InputStream crateJson = PreviewTest.class.getResourceAsStream("/crates/other/idrc_project/ro-crate-metadata.json");
Path crate = dir.resolve("crate");
- // this crate will not have a json file
Path fakeCrate = dir.resolve("fakeCrate");
FileUtils.forceMkdir(crate.toFile());
- FileUtils.copyInputStreamToFile(crateJson, crate.resolve("ro-crate-metadata.json").toFile());
+
+ try (InputStream crateJson = PreviewTest.class.getResourceAsStream("/crates/other/idrc_project/ro-crate-metadata.json")) {
+ Assertions.assertNotNull(crateJson);
+ FileUtils.copyInputStreamToFile(crateJson, crate.resolve("ro-crate-metadata.json").toFile());
+ }
customPreview.saveAllToFolder(crate.toFile());
- try {
- // this should trow an exception but not stop the execution
+ try {
+ // this should throw an exception but not stop the execution
customPreview.saveAllToFolder(fakeCrate.toFile());
Assertions.fail("Expected IOException when providing invalid ZIP file for preview.");
} catch (IOException ex) {
//ok
}
- // there should be a html file generated
assertTrue(Files.isRegularFile(crate.resolve("ro-crate-preview.html")));
}
@Test
void testCustomPreviewZip(@TempDir Path tmp) throws IOException {
CustomPreview customPreview = new CustomPreview();
- InputStream crateJson = PreviewTest.class.getResourceAsStream("/crates/other/idrc_project/ro-crate-metadata.json");
Path crate = tmp.resolve("crate");
ZipParameters zipParameters = new ZipParameters();
zipParameters.setFileNameInZip("ro-crate-metadata.json");
- ZipFile zipFile = new ZipFile(tmp.resolve("test.zip").toFile());
- zipFile.addStream(crateJson, zipParameters);
- crateJson.close();
+ try (ZipFile zipFile = new ZipFile(tmp.resolve("test.zip").toFile());
+ InputStream crateJson = PreviewTest.class.getResourceAsStream("/crates/other/idrc_project/ro-crate-metadata.json")) {
+ zipFile.addStream(crateJson, zipParameters);
+ customPreview.saveAllToZip(zipFile);
+
+ try {
+ // this should throw an exception but not stop the execution
+ ZipFile randomZipFile = new ZipFile(tmp.resolve("dddd.zip").toFile());
+ customPreview.saveAllToZip(randomZipFile);
+ Assertions.fail("Expected IOException when providing invalid input to preview.");
+ } catch (IOException ex) {
+ //ok
+ }
+ zipFile.extractAll(crate.toString());
+ }
+ assertTrue(Files.isRegularFile(crate.resolve("ro-crate-preview.html")));
+ }
+
+ @Test
+ void testCustomPreviewZipStream(@TempDir Path tmp) throws IOException {
+ CustomPreview preview = new CustomPreview();
+ String metadataPath = "/crates/other/idrc_project/ro-crate-metadata.json";
+ Path crate = tmp.resolve("crate");
+ File zipFile = tmp.resolve("test.zip").toFile();
+
+ try (ZipFile zip = new ZipFile(zipFile);
+ InputStream crateJson = PreviewTest.class.getResourceAsStream(metadataPath)) {
+ ZipParameters zipParameters = new ZipParameters();
+ zipParameters.setFileNameInZip("ro-crate-metadata.json");
+ zip.addStream(crateJson, zipParameters);
+ }
+
+ String metadata;
+ try (InputStream metadataStream = PreviewTest.class.getResourceAsStream(metadataPath)) {
+ Assertions.assertNotNull(metadataStream);
+ metadata = new String(metadataStream.readAllBytes());
+ }
- customPreview.saveAllToZip(zipFile);
+ try (ZipOutputStream stream = new ZipOutputStream(new FileOutputStream(zipFile))) {
+ preview.saveAllToStream(metadata, stream);
+ stream.flush();
+ }
try {
- // this should trow an exception but not stop the execution
+ // this should throw an exception but not stop the execution
ZipFile randomZipFile = new ZipFile(tmp.resolve("dddd.zip").toFile());
- customPreview.saveAllToZip(randomZipFile);
+ preview.saveAllToZip(randomZipFile);
Assertions.fail("Expected IOException when providing invalid input to preview.");
} catch (IOException ex) {
//ok
}
- zipFile.extractAll(crate.toString());
+
+ try (ZipFile zipReader = new ZipFile(zipFile)) {
+ zipReader.extractAll(crate.toString());
+ }
assertTrue(Files.isRegularFile(crate.resolve("ro-crate-preview.html")));
}
-
}
+
diff --git a/src/test/java/edu/kit/datamanager/ro_crate/reader/CrateReaderTest.java b/src/test/java/edu/kit/datamanager/ro_crate/reader/CommonReaderTest.java
similarity index 78%
rename from src/test/java/edu/kit/datamanager/ro_crate/reader/CrateReaderTest.java
rename to src/test/java/edu/kit/datamanager/ro_crate/reader/CommonReaderTest.java
index bcf8948d..1e162a1c 100644
--- a/src/test/java/edu/kit/datamanager/ro_crate/reader/CrateReaderTest.java
+++ b/src/test/java/edu/kit/datamanager/ro_crate/reader/CommonReaderTest.java
@@ -27,9 +27,13 @@
* This parameter is only required to satisfy the generic reader strategy.
* @param
+ * An implementation test may use this methode to provide a subset of the
+ * test cases where an IOException is expected.
+ *
+ * @param input the input to test for presence in the blacklist
+ * @return true if the input is in the blacklist, false otherwise
+ */
+ default boolean isInBlacklist(String input) {
+ return false;
+ }
+
+ /**
+ * ELN Crates are zip files not fully compatible with the Ro-Crate standard
+ * in the sense that they must contain a single subfolder in the zip file
+ * which then contain a crate as specified by the Ro-Crate standard.
+ *
+ * Here we test if we can read them using out ZipReader.
+ *
+ * @see
+ */
+ @ParameterizedTest
+ @ValueSource(strings = {
+ "https://github.com/TheELNConsortium/TheELNFileFormat/raw/refs/heads/master/examples/AI4Green/Export%20workbook-2024-08-27-export.eln",
+ "https://github.com/TheELNConsortium/TheELNFileFormat/raw/refs/heads/master/examples/OpenSemanticLab/MinimalExample.osl.eln",
+ "https://github.com/TheELNConsortium/TheELNFileFormat/raw/refs/heads/master/examples/PASTA/PASTA.eln",
+ "https://github.com/TheELNConsortium/TheELNFileFormat/raw/refs/heads/master/examples/RSpace/RSpace-2023-12-08-14-44-xml-SELECTION-c0bEtpHcnNe-HA.eln",
+ "https://github.com/TheELNConsortium/TheELNFileFormat/raw/refs/heads/master/examples/SampleDB/sampledb_export.eln",
+ "https://github.com/TheELNConsortium/TheELNFileFormat/raw/refs/heads/master/examples/elabftw/export.eln",
+ "https://github.com/TheELNConsortium/TheELNFileFormat/raw/refs/heads/master/examples/kadi4mat/records-example.eln",
+ "https://github.com/TheELNConsortium/TheELNFileFormat/raw/refs/heads/master/examples/kadi4mat/collections-example.eln"
+ })
+ default void testReadElnCrates(String urlStr, @TempDir Path tmp) throws IOException {
+ // Download the ELN file
+ URL url = URI.create(urlStr).toURL();
+ Path elnFile = tmp.resolve("downloaded.eln");
+ FileUtils.copyURLToFile(url, elnFile.toFile(), 20000, 20000);
+ assertTrue(elnFile.toFile().exists());
+
+ if (!isInBlacklist(urlStr)) {
+ // Read the crate from the downloaded file
+ Crate read = this.readCrate(elnFile);
+ assertNotNull(read);
+ assertFalse(read.getAllDataEntities().isEmpty());
+ } else {
+ // If the file is in the blacklist, we expect an IOException
+ assertThrows(IOException.class, () -> this.readCrate(elnFile));
+ }
+ }
+}
diff --git a/src/test/java/edu/kit/datamanager/ro_crate/reader/FolderReaderTest.java b/src/test/java/edu/kit/datamanager/ro_crate/reader/FolderReaderTest.java
index 21850edd..83bf4f78 100644
--- a/src/test/java/edu/kit/datamanager/ro_crate/reader/FolderReaderTest.java
+++ b/src/test/java/edu/kit/datamanager/ro_crate/reader/FolderReaderTest.java
@@ -17,29 +17,29 @@
* @author Nikola Tzotchev on 9.2.2022 г.
* @version 1
*/
-class FolderReaderTest extends CrateReaderTest