diff --git a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java index c20294cd85b..c1a7d37b986 100644 --- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java +++ b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java @@ -19,38 +19,133 @@ package org.apache.sysds.test.functions.io.hdf5; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; +import java.util.List; +import java.util.stream.Collectors; import org.apache.sysds.api.DMLScript; import org.apache.sysds.common.Types.ExecMode; +import org.apache.sysds.common.Types.FileFormat; +import org.apache.sysds.common.Types.ValueType; import org.apache.sysds.conf.CompilerConfig; import org.apache.sysds.runtime.matrix.data.MatrixValue; +import org.apache.sysds.runtime.meta.MatrixCharacteristics; +import org.apache.sysds.runtime.util.HDFSTool; import org.apache.sysds.test.TestConfiguration; import org.apache.sysds.test.TestUtils; +import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; -public abstract class ReadHDF5Test extends ReadHDF5TestBase { +@RunWith(Parameterized.class) +public class ReadHDF5Test extends ReadHDF5TestBase { - protected abstract int getId(); + private static final double eps = 1e-9; + private static final String TEST_NAME = "ReadHDF5Test"; - protected String getInputHDF5FileName() { - return "transfusion_" + getId() + ".h5"; + private static final int S_2D_ROWS = 200; + private static final int S_2D_COLS = 40; + private static final int S_ARRAY_LENGTH = 30; + private static final int MATRIX_3D_ROWS = 15; + private static final int MATRIX_3D_FLATTENED_COLS = 15 * 5; + private static final int MULTI_TENSOR_SAMPLES = 120; + private static final int MULTI_TENSOR_LABEL_FEATURES = 12; + private static final int MULTI_TENSOR_SEN1_FLATTENED_COLS = 16 * 16 * 4; + + private static final List TEST_CASES = Collections.unmodifiableList(Arrays.asList( + new Hdf5TestCase( + "test_single_dataset.h5", "data", DmlVariant.FORMAT_AND_DATASET, S_2D_ROWS, S_2D_COLS), + new Hdf5TestCase( + "test_multiple_datasets.h5", "matrix_2d", DmlVariant.DATASET_ONLY, S_2D_ROWS, S_2D_COLS), + new Hdf5TestCase( + "test_multiple_datasets.h5", "matrix_3d", DmlVariant.DATASET_ONLY, MATRIX_3D_ROWS, MATRIX_3D_FLATTENED_COLS), + new Hdf5TestCase( + "test_different_dtypes.h5", "double_primary", DmlVariant.DATASET_ONLY, S_2D_ROWS, S_2D_COLS), + new Hdf5TestCase( + "test_chunked.h5", "chunked_data", DmlVariant.FORMAT_AND_DATASET, S_2D_ROWS, S_2D_COLS), + new Hdf5TestCase( + "test_compressed.h5", "gzip_compressed_9", DmlVariant.DATASET_ONLY, S_2D_ROWS, S_2D_COLS), + new Hdf5TestCase( + "test_multi_tensor_samples.h5", "label", DmlVariant.DATASET_ONLY, MULTI_TENSOR_SAMPLES, MULTI_TENSOR_LABEL_FEATURES), + new Hdf5TestCase( + "test_multi_tensor_samples.h5", "sen1", DmlVariant.DATASET_ONLY, MULTI_TENSOR_SAMPLES, MULTI_TENSOR_SEN1_FLATTENED_COLS), + new Hdf5TestCase( + "test_nested_groups.h5", "group1/subgroup/data2", DmlVariant.FORMAT_AND_DATASET, S_2D_ROWS, S_2D_COLS), + new Hdf5TestCase( + "test_with_attributes.h5", "data", DmlVariant.DATASET_ONLY, S_2D_ROWS, S_2D_COLS), + new Hdf5TestCase( + "test_empty_datasets.h5", "empty", DmlVariant.FORMAT_AND_DATASET, 0, S_2D_COLS), + new Hdf5TestCase( + "test_string_datasets.h5", "string_array", DmlVariant.DATASET_ONLY, S_ARRAY_LENGTH, 1) + )); + + private final Hdf5TestCase testCase; + + public ReadHDF5Test(Hdf5TestCase testCase) { + this.testCase = testCase; } - private final static double eps = 1e-9; + @BeforeClass + public static void ensureHdf5DataGenerated() { + Path scriptDir = Paths.get(SCRIPT_DIR, TEST_DIR); + Path inputDir = scriptDir.resolve(INPUT_DIR); + boolean missingFiles = TEST_CASES.stream() + .anyMatch(tc -> Files.notExists(inputDir.resolve(tc.hdf5File))); + if(!missingFiles) + ensureMetadataFiles(inputDir); + else { + generateHdf5Data(scriptDir); - @Test - public void testHDF51_Seq_CP() { - runReadHDF5Test(getId(), ExecMode.SINGLE_NODE, false); + boolean stillMissing = TEST_CASES.stream() + .anyMatch(tc -> Files.notExists(inputDir.resolve(tc.hdf5File))); + if(stillMissing) + Assert.fail("Failed to generate required HDF5 files for ReadHDF5 tests."); + + ensureMetadataFiles(inputDir); + } + } + + @Parameters(name = "{0}") + public static Collection data() { + return TEST_CASES.stream() + .map(tc -> new Object[] {tc}) + .collect(Collectors.toList()); + } + + @Override + protected String getTestName() { + return TEST_NAME; + } + + @Override + protected String getTestClassDir() { + return TEST_CLASS_DIR; } @Test - public void testHDF51_Parallel_CP() { - runReadHDF5Test(getId(), ExecMode.SINGLE_NODE, true); + public void testReadSequential() { + runReadHDF5Test(testCase, ExecMode.SINGLE_NODE, false); } - protected void runReadHDF5Test(int testNumber, ExecMode platform, boolean parallel) { + @Test + public void testReadSequentialParallelIO() { + runReadHDF5Test(testCase, ExecMode.SINGLE_NODE, true); + } + protected void runReadHDF5Test(Hdf5TestCase testCase, ExecMode platform, boolean parallel) { ExecMode oldPlatform = rtplatform; rtplatform = platform; @@ -61,21 +156,19 @@ protected void runReadHDF5Test(int testNumber, ExecMode platform, boolean parall boolean oldpar = CompilerConfig.FLAG_PARREADWRITE_TEXT; try { - CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel; TestConfiguration config = getTestConfiguration(getTestName()); loadTestConfiguration(config); String HOME = SCRIPT_DIR + TEST_DIR; - String inputMatrixName = HOME + INPUT_DIR + getInputHDF5FileName(); // always read the same data - String datasetName = "DATASET_1"; + String inputMatrixName = HOME + INPUT_DIR + testCase.hdf5File; - fullDMLScriptName = HOME + getTestName() + "_" + testNumber + ".dml"; - programArgs = new String[] {"-args", inputMatrixName, datasetName, output("Y")}; + fullDMLScriptName = HOME + testCase.variant.getScriptName(); + programArgs = new String[] {"-args", inputMatrixName, testCase.dataset, output("Y")}; fullRScriptName = HOME + "ReadHDF5_Verify.R"; - rCmd = "Rscript" + " " + fullRScriptName + " " + inputMatrixName + " " + datasetName + " " + expectedDir(); + rCmd = "Rscript" + " " + fullRScriptName + " " + inputMatrixName + " " + testCase.dataset + " " + expectedDir(); runTest(true, false, null, -1); runRScript(true); @@ -90,4 +183,93 @@ protected void runReadHDF5Test(int testNumber, ExecMode platform, boolean parall DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; } } + + private static void generateHdf5Data(Path scriptDir) { + ProcessBuilder processBuilder = new ProcessBuilder("Rscript", "gen_HDF5_testdata.R"); + processBuilder.directory(scriptDir.toFile()); + processBuilder.redirectErrorStream(true); + + try { + Process process = processBuilder.start(); + StringBuilder output = new StringBuilder(); + try(BufferedReader reader = new BufferedReader( + new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { + reader.lines().forEach(line -> output.append(line).append(System.lineSeparator())); + } + int exitCode = process.waitFor(); + if(exitCode != 0) + Assert.fail("Failed to execute gen_HDF5_testdata.R (exit " + exitCode + "):\n" + output); + } + catch(IOException e) { + Assert.fail("Unable to execute gen_HDF5_testdata.R: " + e.getMessage()); + } + catch(InterruptedException e) { + Thread.currentThread().interrupt(); + Assert.fail("Interrupted while generating HDF5 test data."); + } + } + + private static void ensureMetadataFiles(Path inputDir) { + try { + Files.createDirectories(inputDir); + for(Hdf5TestCase tc : TEST_CASES) { + Path mtdPath = inputDir.resolve(tc.getMtdFileName()); + if(Files.exists(mtdPath)) + continue; + + MatrixCharacteristics mc = new MatrixCharacteristics(tc.rows, tc.cols, tc.getNonZeros()); + HDFSTool.writeMetaDataFile(mtdPath.toString(), ValueType.FP64, mc, FileFormat.HDF5); + } + } + catch(IOException e) { + Assert.fail("Unable to create HDF5 metadata files: " + e.getMessage()); + } + } + + private enum DmlVariant { + FORMAT_AND_DATASET("ReadHDF5_WithFormatAndDataset.dml"), + DATASET_ONLY("ReadHDF5_WithDataset.dml"), + DEFAULT("ReadHDF5_Default.dml"); + + private final String scriptName; + + DmlVariant(String scriptName) { + this.scriptName = scriptName; + } + + public String getScriptName() { + return scriptName; + } + } + + private static final class Hdf5TestCase { + private final String hdf5File; + private final String dataset; + private final DmlVariant variant; + private final long rows; + private final long cols; + + private Hdf5TestCase(String hdf5File, String dataset, DmlVariant variant, long rows, long cols) { + this.hdf5File = hdf5File; + this.dataset = dataset; + this.variant = variant; + this.rows = rows; + this.cols = cols; + } + + private String getMtdFileName() { + return hdf5File + ".mtd"; + } + + private long getNonZeros() { + if(rows == 0 || cols == 0) + return 0; + return rows * cols; + } + + @Override + public String toString() { + return hdf5File + "::" + dataset; + } + } } diff --git a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java deleted file mode 100644 index b0fff7a6391..00000000000 --- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysds.test.functions.io.hdf5; - -public class ReadHDF5Test1 extends ReadHDF5Test { - - private final static String TEST_NAME = "ReadHDF5Test"; - public final static String TEST_CLASS_DIR = TEST_DIR + ReadHDF5Test1.class.getSimpleName() + "/"; - - protected String getTestName() { - return TEST_NAME; - } - - protected String getTestClassDir() { - return TEST_CLASS_DIR; - } - - protected int getId() { - return 1; - } -} diff --git a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java deleted file mode 100644 index d6a4c763c34..00000000000 --- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysds.test.functions.io.hdf5; - -public class ReadHDF5Test2 extends ReadHDF5Test { - - private final static String TEST_NAME = "ReadHDF5Test"; - private final static String TEST_CLASS_DIR = TEST_DIR + ReadHDF5Test2.class.getSimpleName() + "/"; - - protected String getTestName() { - return TEST_NAME; - } - - protected String getTestClassDir() { - return TEST_CLASS_DIR; - } - - protected int getId() { - return 2; - } -} diff --git a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java deleted file mode 100644 index 71a6b1762ec..00000000000 --- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysds.test.functions.io.hdf5; - -public class ReadHDF5Test3 extends ReadHDF5Test { - - private final static String TEST_NAME = "ReadHDF5Test"; - private final static String TEST_CLASS_DIR = TEST_DIR + ReadHDF5Test3.class.getSimpleName() + "/"; - - protected String getTestName() { - return TEST_NAME; - } - - protected String getTestClassDir() { - return TEST_CLASS_DIR; - } - - protected int getId() { - return 3; - } -} diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_3.dml b/src/test/scripts/functions/io/hdf5/ReadHDF5_Default.dml similarity index 100% rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_3.dml rename to src/test/scripts/functions/io/hdf5/ReadHDF5_Default.dml diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_2.dml b/src/test/scripts/functions/io/hdf5/ReadHDF5_WithDataset.dml similarity index 100% rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_2.dml rename to src/test/scripts/functions/io/hdf5/ReadHDF5_WithDataset.dml diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_1.dml b/src/test/scripts/functions/io/hdf5/ReadHDF5_WithFormatAndDataset.dml similarity index 100% rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_1.dml rename to src/test/scripts/functions/io/hdf5/ReadHDF5_WithFormatAndDataset.dml diff --git a/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R b/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R new file mode 100644 index 00000000000..a1f5c284270 --- /dev/null +++ b/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R @@ -0,0 +1,239 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +# Generate various HDF5 test files with different formats. +# Creates test files in the 'in' directory. + +if (!require("rhdf5", quietly = TRUE)) { + cat("Error: rhdf5 is not installed.\n") + quit(status = 1) +} + +SMALL_MATRIX_2D <- c(200, 40) +SMALL_MATRIX_3D <- c(15, 15, 5) +SMALL_TENSOR_4D_A <- c(120, 16, 16, 4) +SMALL_TENSOR_4D_B <- c(120, 16, 16, 5) +SMALL_LABEL_MATRIX <- c(120, 12) + +VECTOR_LENGTH <- 200 +STRING_ARRAY_LENGTH <- 30 + +CHUNK_SHAPE <- c(100, 20) + +write_matrix <- function(file_path, dataset_name, shape, generator = function(n) rnorm(n)) { + values <- generator(prod(shape)) + # Create dataset without compression, filters, or chunking to avoid message type 11 (Filter Pipeline) + # filter = "NONE": explicitly disable compression filters + # level = 0: no compression + # shuffle = FALSE: no shuffle filter + # chunk = dims: single chunk matching dataset size (effectively contiguous for small datasets) + h5createDataset(file_path, dataset_name, dims = shape, + filter = "NONE", level = 0, shuffle = FALSE, chunk = shape) + h5write(array(values, dim = shape), file_path, dataset_name) +} + +generate_test_file_single_dataset <- function(dir) { + file_path <- file.path(dir, "test_single_dataset.h5") + h5createFile(file_path) + write_matrix(file_path, "data", SMALL_MATRIX_2D) + cat("Created test_single_dataset.h5 (single 2D dataset)\n") +} + +generate_test_file_multiple_datasets <- function(dir) { + file_path <- file.path(dir, "test_multiple_datasets.h5") + h5createFile(file_path) + write_matrix(file_path, "matrix_2d", SMALL_MATRIX_2D) + # Create 1D vector without compression/filters + h5createDataset(file_path, "vector_1d", dims = VECTOR_LENGTH, + filter = "NONE", level = 0, shuffle = FALSE, chunk = VECTOR_LENGTH) + h5write(rnorm(VECTOR_LENGTH), file_path, "vector_1d") + write_matrix(file_path, "matrix_3d", SMALL_MATRIX_3D) + cat("Created test_multiple_datasets.h5 (1D/2D/3D datasets)\n") +} + +generate_test_file_different_dtypes <- function(dir) { + file_path <- file.path(dir, "test_different_dtypes.h5") + h5createFile(file_path) + write_matrix(file_path, "double_primary", SMALL_MATRIX_2D) + write_matrix(file_path, "double_secondary", SMALL_MATRIX_2D) + write_matrix( + file_path, + "int32", + SMALL_MATRIX_2D, + generator = function(n) as.integer(sample(-100:100, n, replace = TRUE)) + ) + write_matrix( + file_path, + "int32_alt", + SMALL_MATRIX_2D, + generator = function(n) as.integer(sample(-100:100, n, replace = TRUE)) + ) + cat("Created test_different_dtypes.h5 (double/int datasets)\n") +} + +# https://support.hdfgroup.org/documentation/hdf5-docs/advanced_topics/chunking_in_hdf5.html +generate_test_file_chunked <- function(dir) { + file_path <- file.path(dir, "test_chunked.h5") + h5createFile(file_path) + + data <- array(rnorm(prod(SMALL_MATRIX_2D)), dim = SMALL_MATRIX_2D) + # Chunked dataset without compression/filters (chunking is intentional for this test) + h5createDataset(file_path, "chunked_data", dims = SMALL_MATRIX_2D, chunk = CHUNK_SHAPE, + filter = "NONE", level = 0, shuffle = FALSE) + h5write(data, file_path, "chunked_data") + + write_matrix(file_path, "non_chunked_data", SMALL_MATRIX_2D) + cat("Created test_chunked.h5 (chunked dataset)\n") +} + +generate_test_file_compressed <- function(dir) { + file_path <- file.path(dir, "test_compressed.h5") + h5createFile(file_path) + data <- array(rnorm(prod(SMALL_MATRIX_2D)), dim = SMALL_MATRIX_2D) + h5createDataset(file_path, "gzip_compressed_9", dims = SMALL_MATRIX_2D, + chunk = SMALL_MATRIX_2D, level = 9) + h5write(data, file_path, "gzip_compressed_9") + h5createDataset(file_path, "gzip_compressed_1", dims = SMALL_MATRIX_2D, + chunk = SMALL_MATRIX_2D, level = 1) + h5write(data, file_path, "gzip_compressed_1") + cat("Created test_compressed.h5 (gzip compression)\n") +} + +generate_test_file_multi_tensor_samples <- function(dir) { + file_path <- file.path(dir, "test_multi_tensor_samples.h5") + h5createFile(file_path) + write_matrix( + file_path, + "sen1", + SMALL_TENSOR_4D_A + ) + write_matrix( + file_path, + "sen2", + SMALL_TENSOR_4D_B + ) + write_matrix( + file_path, + "label", + SMALL_LABEL_MATRIX, + generator = function(n) as.integer(sample(0:1, n, replace = TRUE)) + ) + cat("Created test_multi_tensor_samples.h5 (multi-input tensors)\n") +} + +generate_test_file_nested_groups <- function(dir) { + file_path <- file.path(dir, "test_nested_groups.h5") + h5createFile(file_path) + write_matrix(file_path, "root_data", SMALL_MATRIX_2D) + h5createGroup(file_path, "group1") + write_matrix(file_path, "group1/data1", SMALL_MATRIX_2D) + h5createGroup(file_path, "group1/subgroup") + write_matrix(file_path, "group1/subgroup/data2", SMALL_MATRIX_2D) + cat("Created test_nested_groups.h5 (nested group hierarchy)\n") +} + +generate_test_file_with_attributes <- function(dir) { + file_path <- file.path(dir, "test_with_attributes.h5") + h5createFile(file_path) + write_matrix(file_path, "data", SMALL_MATRIX_2D) + + fid <- H5Fopen(file_path) + did <- H5Dopen(fid, "data") + h5writeAttribute("Test dataset with attributes", did, "description") + h5writeAttribute(1.0, did, "version") + h5writeAttribute(SMALL_MATRIX_2D, did, "shape") + H5Dclose(did) + + h5writeAttribute("2025-11-26", fid, "file_created") + h5writeAttribute("attributes", fid, "test_type") + H5Fclose(fid) + cat("Created test_with_attributes.h5 (dataset + file attributes)\n") +} + +generate_test_file_empty_datasets <- function(dir) { + file_path <- file.path(dir, "test_empty_datasets.h5") + h5createFile(file_path) + h5createDataset(file_path, "empty", dims = c(0, SMALL_MATRIX_2D[2]), + filter = "NONE", level = 0, shuffle = FALSE) + + # Create scalar and vector without compression/filters + h5createDataset(file_path, "scalar", dims = 1, + filter = "NONE", level = 0, shuffle = FALSE, chunk = 1) + h5write(1.0, file_path, "scalar") + h5createDataset(file_path, "vector", dims = VECTOR_LENGTH, + filter = "NONE", level = 0, shuffle = FALSE, chunk = VECTOR_LENGTH) + h5write(rnorm(VECTOR_LENGTH), file_path, "vector") + cat("Created test_empty_datasets.h5 (empty/scalar/vector)\n") +} + +generate_test_file_string_datasets <- function(dir) { + file_path <- file.path(dir, "test_string_datasets.h5") + h5createFile(file_path) + strings <- paste0("string_", 0:(STRING_ARRAY_LENGTH - 1)) + # Create string dataset without compression/filters + h5createDataset(file_path, "string_array", dims = STRING_ARRAY_LENGTH, + storage.mode = "character", filter = "NONE", level = 0, + shuffle = FALSE, chunk = STRING_ARRAY_LENGTH) + h5write(strings, file_path, "string_array") + cat("Created test_string_datasets.h5 (string datasets)\n") +} + +main <- function() { + # Check if working directory is "hdf5". Quit if not. + if (basename(getwd()) != "hdf5") { + cat("You must execute this script from the 'hdf5' directory!\n") + quit(status = 1) + } + + testdir <- "in" + if (!dir.exists(testdir)) { + dir.create(testdir) + } + + test_functions <- list( + generate_test_file_single_dataset, + generate_test_file_multiple_datasets, + generate_test_file_different_dtypes, + generate_test_file_chunked, + generate_test_file_compressed, + generate_test_file_multi_tensor_samples, + generate_test_file_nested_groups, + generate_test_file_with_attributes, + generate_test_file_empty_datasets, + generate_test_file_string_datasets + ) + + for (test_func in test_functions) { + tryCatch({ + test_func(testdir) + }, error = function(e) { + cat(sprintf(" ✗ Error: %s\n", conditionMessage(e))) + }) + } + + files <- sort(list.files(testdir, pattern = "\\.h5$", full.names = TRUE)) + cat(sprintf("\nGenerated %d HDF5 test files in %s\n", length(files), normalizePath(testdir))) +} + +if (!interactive()) { + main() +} \ No newline at end of file