From 262bc9f236e61ce1d1e0fef9266615fbbb3dd308 Mon Sep 17 00:00:00 2001 From: mkriskovic Date: Sat, 30 May 2026 18:17:44 +0200 Subject: [PATCH 1/4] 4000 added names from files to imported files --- .../cluster/KnowledgeBaseVectorStore.java | 6 +++--- .../reader/cluster/AbstractDocumentReader.java | 17 +++++++++++++++++ .../reader/cluster/HtmlJsoupDocumentReader.java | 5 ++++- .../reader/cluster/JsonDocumentReader.java | 7 +++++-- .../reader/cluster/PagePdfDocumentReader.java | 7 +++++-- .../reader/cluster/TikaDocumentReader.java | 5 ++++- 6 files changed, 38 insertions(+), 9 deletions(-) diff --git a/server/libs/modules/components/ai/vectorstore/knowledgebase/src/main/java/com/bytechef/component/ai/vectorstore/knowledgebase/cluster/KnowledgeBaseVectorStore.java b/server/libs/modules/components/ai/vectorstore/knowledgebase/src/main/java/com/bytechef/component/ai/vectorstore/knowledgebase/cluster/KnowledgeBaseVectorStore.java index 128c614171a..75df7ebe1d7 100644 --- a/server/libs/modules/components/ai/vectorstore/knowledgebase/src/main/java/com/bytechef/component/ai/vectorstore/knowledgebase/cluster/KnowledgeBaseVectorStore.java +++ b/server/libs/modules/components/ai/vectorstore/knowledgebase/src/main/java/com/bytechef/component/ai/vectorstore/knowledgebase/cluster/KnowledgeBaseVectorStore.java @@ -117,10 +117,10 @@ private static String deriveDocumentName(List documents) { Map metadata = document.getMetadata(); - Object source = metadata.get("source"); + Object filename = metadata.get("filename"); - if (source != null) { - return source.toString(); + if (filename != null) { + return filename.toString(); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/AbstractDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/AbstractDocumentReader.java index 231ea91f8c9..ec9e6b7bd55 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/AbstractDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/AbstractDocumentReader.java @@ -21,6 +21,10 @@ import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import java.io.File; +import java.util.HashMap; +import java.util.Map; +import org.springframework.ai.document.Document; +import org.springframework.ai.document.DocumentReader; import org.springframework.core.io.FileSystemResource; /** @@ -38,6 +42,19 @@ protected static FileResult getFile(Parameters inputParameters, Context context) return new FileResult(fileEntry, fileSystemResource); } + protected static DocumentReader withFilename(DocumentReader reader, String filename) { + return () -> reader.read() + .stream() + .map(document -> { + Map metadata = new HashMap<>(document.getMetadata()); + + metadata.put("filename", filename); + + return new Document(document.getId(), document.getText(), metadata); + }) + .toList(); + } + protected record FileResult(FileEntry fileEntry, FileSystemResource fileSystemResource) { } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java index 995e1b6970f..2bda090e4b5 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java @@ -49,6 +49,9 @@ protected static org.springframework.ai.document.DocumentReader apply( FileResult result = getFile(inputParameters, context); - return new org.springframework.ai.reader.jsoup.JsoupDocumentReader(result.fileSystemResource()); + return withFilename( + new org.springframework.ai.reader.jsoup.JsoupDocumentReader(result.fileSystemResource()), + result.fileEntry() + .getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java index f1cd2ce7ce8..5939b5c2c11 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java @@ -51,14 +51,17 @@ public class JsonDocumentReader extends AbstractDocumentReader { .required(false)) .object(() -> JsonDocumentReader::apply); - protected static JsonReader apply( + protected static org.springframework.ai.document.DocumentReader apply( Parameters inputParameters, Parameters connectionParameters, Context context) { FileResult result = getFile(inputParameters, context); List keys = inputParameters.getList(JSON_KEYS_TO_USE, String.class); - return (keys == null) ? new JsonReader(result.fileSystemResource()) + JsonReader jsonReader = (keys == null) ? new JsonReader(result.fileSystemResource()) : new JsonReader(result.fileSystemResource(), keys.toArray(new String[0])); + + return withFilename(jsonReader, result.fileEntry() + .getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java index c8d082644b1..2e8dcb990f6 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java @@ -41,7 +41,7 @@ public class PagePdfDocumentReader extends AbstractDocumentReader { DOCUMENT_PROPERTY) .object(() -> PagePdfDocumentReader::apply); - protected static org.springframework.ai.reader.pdf.PagePdfDocumentReader apply( + protected static org.springframework.ai.document.DocumentReader apply( Parameters inputParameters, Parameters connectionParameters, Context context) { FileResult result = getFile(inputParameters, context); @@ -55,6 +55,9 @@ protected static org.springframework.ai.reader.pdf.PagePdfDocumentReader apply( .withPagesPerDocument(1) .build(); - return new org.springframework.ai.reader.pdf.PagePdfDocumentReader(result.fileSystemResource(), config); + return withFilename( + new org.springframework.ai.reader.pdf.PagePdfDocumentReader(result.fileSystemResource(), config), + result.fileEntry() + .getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java index 44f161bf2ce..e4d3470db5f 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java @@ -51,6 +51,9 @@ protected static org.springframework.ai.document.DocumentReader apply( FileResult result = getFile(inputParameters, context); - return new org.springframework.ai.reader.tika.TikaDocumentReader(result.fileSystemResource()); + return withFilename( + new org.springframework.ai.reader.tika.TikaDocumentReader(result.fileSystemResource()), + result.fileEntry() + .getName()); } } From 817f0fc36f50002fa3dfe4180f495f74c6788c22 Mon Sep 17 00:00:00 2001 From: mkriskovic Date: Sat, 30 May 2026 18:24:54 +0200 Subject: [PATCH 2/4] 4000 delete edge case --- .../service/FilesystemFileStorageService.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/server/libs/core/file-storage/file-storage-filesystem-service/src/main/java/com/bytechef/file/storage/filesystem/service/FilesystemFileStorageService.java b/server/libs/core/file-storage/file-storage-filesystem-service/src/main/java/com/bytechef/file/storage/filesystem/service/FilesystemFileStorageService.java index 73371394313..c66825d8f9d 100644 --- a/server/libs/core/file-storage/file-storage-filesystem-service/src/main/java/com/bytechef/file/storage/filesystem/service/FilesystemFileStorageService.java +++ b/server/libs/core/file-storage/file-storage-filesystem-service/src/main/java/com/bytechef/file/storage/filesystem/service/FilesystemFileStorageService.java @@ -63,15 +63,21 @@ public FilesystemFileStorageService(String baseDir) { @Override public void deleteFile(String directory, FileEntry fileEntry) { - Path directoryPath = resolveDirectoryPath(directory); - Path filePath = resolveFilePath(directoryPath, directory, fileEntry.getUrl()); + if (fileEntry != null) { + Path directoryPath = resolveDirectoryPath(directory); + Path filePath = resolveFilePath(directoryPath, directory, fileEntry.getUrl()); - File file = filePath.toFile(); + File file = filePath.toFile(); + + if (!file.exists()) { + return; + } - boolean deleted = file.delete(); + boolean deleted = file.delete(); - if (!deleted) { - throw new FileStorageException("File %s cannot be deleted".formatted(directoryPath)); + if (!deleted) { + throw new FileStorageException("File %s cannot be deleted".formatted(filePath)); + } } } From c4b0d103b4c937986255f392a0bf22c7b77a4a57 Mon Sep 17 00:00:00 2001 From: mkriskovic Date: Sat, 30 May 2026 19:04:15 +0200 Subject: [PATCH 3/4] 4000 make loading unexisting data not crash --- .../file/storage/KnowledgeBaseFileStorage.java | 8 ++++++++ .../file/storage/KnowledgeBaseFileStorageImpl.java | 5 +++++ .../facade/KnowledgeBaseDocumentChunkFacadeImpl.java | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-api/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorage.java b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-api/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorage.java index dc7f6e79a45..734aff55330 100644 --- a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-api/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorage.java +++ b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-api/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorage.java @@ -39,6 +39,14 @@ public interface KnowledgeBaseFileStorage { */ void deleteDocument(FileEntry fileEntry); + /** + * Checks whether the chunk content file associated with the given file entry exists in storage. + * + * @param fileEntry the file entry representing the chunk to check + * @return true if the file exists, false otherwise + */ + boolean chunkContentExists(FileEntry fileEntry); + /** * Reads the chunk content associated with the given file entry. * diff --git a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-impl/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorageImpl.java b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-impl/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorageImpl.java index 8caa02c9646..6253ba9063e 100644 --- a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-impl/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorageImpl.java +++ b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-impl/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorageImpl.java @@ -38,6 +38,11 @@ public KnowledgeBaseFileStorageImpl(FileStorageService fileStorageService) { this.fileStorageService = fileStorageService; } + @Override + public boolean chunkContentExists(FileEntry fileEntry) { + return fileStorageService.fileExists(KNOWLEDGE_BASE_CHUNKS_DIR, fileEntry); + } + @Override public void deleteChunkContent(FileEntry fileEntry) { fileStorageService.deleteFile(KNOWLEDGE_BASE_CHUNKS_DIR, fileEntry); diff --git a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-service/src/main/java/com/bytechef/platform/knowledgebase/facade/KnowledgeBaseDocumentChunkFacadeImpl.java b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-service/src/main/java/com/bytechef/platform/knowledgebase/facade/KnowledgeBaseDocumentChunkFacadeImpl.java index 213e7b22e61..26bbff9d9c3 100644 --- a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-service/src/main/java/com/bytechef/platform/knowledgebase/facade/KnowledgeBaseDocumentChunkFacadeImpl.java +++ b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-service/src/main/java/com/bytechef/platform/knowledgebase/facade/KnowledgeBaseDocumentChunkFacadeImpl.java @@ -64,7 +64,7 @@ public List getKnowledgeBaseDocumentChunksByDocument for (KnowledgeBaseDocumentChunk chunk : chunks) { FileEntry contentFileEntry = chunk.getContent(); - if (contentFileEntry != null) { + if (contentFileEntry != null && knowledgeBaseFileStorage.chunkContentExists(contentFileEntry)) { String textContent = knowledgeBaseFileStorage.readChunkContent(contentFileEntry); chunk.setTextContent(textContent); From edd8a74f7436cec8675071d4fff94d6d17f08cde Mon Sep 17 00:00:00 2001 From: Ivica Cardic Date: Sat, 6 Jun 2026 10:19:19 +0200 Subject: [PATCH 4/4] 4000 SF --- .../vectorstore/reader/cluster/HtmlJsoupDocumentReader.java | 6 ++++-- .../ai/vectorstore/reader/cluster/JsonDocumentReader.java | 6 ++++-- .../vectorstore/reader/cluster/PagePdfDocumentReader.java | 6 ++++-- .../ai/vectorstore/reader/cluster/TikaDocumentReader.java | 6 ++++-- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java index 2bda090e4b5..39d2321c618 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java @@ -22,6 +22,7 @@ import com.bytechef.component.definition.ClusterElementDefinition; import com.bytechef.component.definition.ComponentDsl; import com.bytechef.component.definition.Context; +import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import com.bytechef.platform.component.definition.ai.vectorstore.DocumentReaderFunction; @@ -49,9 +50,10 @@ protected static org.springframework.ai.document.DocumentReader apply( FileResult result = getFile(inputParameters, context); + FileEntry fileEntry = result.fileEntry(); + return withFilename( new org.springframework.ai.reader.jsoup.JsoupDocumentReader(result.fileSystemResource()), - result.fileEntry() - .getName()); + fileEntry.getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java index 5939b5c2c11..a3a198e1719 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java @@ -25,6 +25,7 @@ import com.bytechef.component.definition.ClusterElementDefinition; import com.bytechef.component.definition.ComponentDsl; import com.bytechef.component.definition.Context; +import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import com.bytechef.platform.component.definition.ai.vectorstore.DocumentReaderFunction; import java.util.List; @@ -61,7 +62,8 @@ protected static org.springframework.ai.document.DocumentReader apply( JsonReader jsonReader = (keys == null) ? new JsonReader(result.fileSystemResource()) : new JsonReader(result.fileSystemResource(), keys.toArray(new String[0])); - return withFilename(jsonReader, result.fileEntry() - .getName()); + FileEntry fileEntry = result.fileEntry(); + + return withFilename(jsonReader, fileEntry.getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java index 2e8dcb990f6..8ef066f6fb6 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java @@ -22,6 +22,7 @@ import com.bytechef.component.definition.ClusterElementDefinition; import com.bytechef.component.definition.ComponentDsl; import com.bytechef.component.definition.Context; +import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import com.bytechef.platform.component.definition.ai.vectorstore.DocumentReaderFunction; import org.springframework.ai.reader.ExtractedTextFormatter; @@ -55,9 +56,10 @@ protected static org.springframework.ai.document.DocumentReader apply( .withPagesPerDocument(1) .build(); + FileEntry fileEntry = result.fileEntry(); + return withFilename( new org.springframework.ai.reader.pdf.PagePdfDocumentReader(result.fileSystemResource(), config), - result.fileEntry() - .getName()); + fileEntry.getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java index e4d3470db5f..256efd497c1 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java @@ -22,6 +22,7 @@ import com.bytechef.component.definition.ClusterElementDefinition; import com.bytechef.component.definition.ComponentDsl; import com.bytechef.component.definition.Context; +import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import com.bytechef.platform.component.definition.ai.vectorstore.DocumentReaderFunction; @@ -51,9 +52,10 @@ protected static org.springframework.ai.document.DocumentReader apply( FileResult result = getFile(inputParameters, context); + FileEntry fileEntry = result.fileEntry(); + return withFilename( new org.springframework.ai.reader.tika.TikaDocumentReader(result.fileSystemResource()), - result.fileEntry() - .getName()); + fileEntry.getName()); } }