From 99c36a5910276e60b4345322ab7d44d22d93706c Mon Sep 17 00:00:00 2001 From: Matt Gilman Date: Fri, 5 Jun 2026 15:06:16 -0400 Subject: [PATCH] NIFI-16000: Allow spaces in FileUtils.getSanitizedFilename Remove the space character from the invalid-character set so spaces are preserved rather than replaced with an underscore. No other normalization is performed, so leading, trailing, repeated, and interior spaces are kept exactly as supplied. This lets the asset-upload callers accept common valid filenames such as "driver (1).jar" that were previously rejected, while remaining backward compatible for names that contain no spaces. Add TestFileUtils covering the sanitization contract. --- .../org/apache/nifi/util/file/FileUtils.java | 9 ++-- .../apache/nifi/util/file/TestFileUtils.java | 53 +++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 nifi-commons/nifi-utils/src/test/java/org/apache/nifi/util/file/TestFileUtils.java diff --git a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/file/FileUtils.java b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/file/FileUtils.java index 93890c1d339e..3babb6e41188 100644 --- a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/file/FileUtils.java +++ b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/file/FileUtils.java @@ -581,8 +581,10 @@ public static long getContainerUsableSpace(final Path path) { // The invalid character list is derived from this Stackoverflow page. // https://stackoverflow.com/questions/1155107/is-there-a-cross-platform-java-method-to-remove-filename-special-chars + // The space character (32) is intentionally omitted: spaces are legal on every major file system, so they are + // preserved rather than replaced. private static final int[] INVALID_CHARS = {34, 60, 62, 124, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 58, 42, 63, 92, 47, 32}; + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 58, 42, 63, 92, 47}; static { Arrays.sort(INVALID_CHARS); @@ -590,6 +592,7 @@ public static long getContainerUsableSpace(final Path path) { /** * Replaces invalid characters for a file system name within a given filename string to underscore '_'. + * Spaces are permitted and preserved. * Be careful not to pass a file path as this method replaces path delimiter characters (i.e forward/back slashes). * @param filename The filename to clean * @return sanitized filename @@ -602,10 +605,10 @@ public static String getSanitizedFilename(String filename) { return ""; } - int codePointCount = filename.codePointCount(0, filename.length()); + final int codePointCount = filename.codePointCount(0, filename.length()); final StringBuilder cleanName = new StringBuilder(); for (int i = 0; i < codePointCount; i++) { - int c = filename.codePointAt(i); + final int c = filename.codePointAt(i); if (Arrays.binarySearch(INVALID_CHARS, c) < 0) { cleanName.appendCodePoint(c); } else { diff --git a/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/util/file/TestFileUtils.java b/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/util/file/TestFileUtils.java new file mode 100644 index 000000000000..31c574e18092 --- /dev/null +++ b/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/util/file/TestFileUtils.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.util.file; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class TestFileUtils { + + @Test + public void testGetSanitizedFilenameNullAndEmpty() { + assertNull(FileUtils.getSanitizedFilename(null)); + assertEquals("", FileUtils.getSanitizedFilename("")); + } + + @Test + public void testGetSanitizedFilenameReplacesInvalidCharacters() { + assertEquals("a_b_c", FileUtils.getSanitizedFilename("a/b\\c")); + assertEquals("name_", FileUtils.getSanitizedFilename("name:")); + assertEquals("a_b", FileUtils.getSanitizedFilename("a\tb")); + assertEquals("_", FileUtils.getSanitizedFilename("*")); + } + + @Test + public void testGetSanitizedFilenamePreservesSpaces() { + assertEquals("driver (1).jar", FileUtils.getSanitizedFilename("driver (1).jar")); + assertEquals("my report.txt", FileUtils.getSanitizedFilename("my report.txt")); + assertEquals("driver (1).jar", FileUtils.getSanitizedFilename("driver (1).jar")); + assertEquals(" driver (1).jar ", FileUtils.getSanitizedFilename(" driver (1).jar ")); + } + + @Test + public void testGetSanitizedFilenamePreservesDots() { + assertEquals("report...", FileUtils.getSanitizedFilename("report...")); + assertEquals(".env", FileUtils.getSanitizedFilename(".env")); + } +}