diff --git a/server/libs/core/file-storage/file-storage-filesystem-service/src/main/java/com/bytechef/file/storage/filesystem/service/FilesystemFileStorageService.java b/server/libs/core/file-storage/file-storage-filesystem-service/src/main/java/com/bytechef/file/storage/filesystem/service/FilesystemFileStorageService.java index 73371394313..c66825d8f9d 100644 --- a/server/libs/core/file-storage/file-storage-filesystem-service/src/main/java/com/bytechef/file/storage/filesystem/service/FilesystemFileStorageService.java +++ b/server/libs/core/file-storage/file-storage-filesystem-service/src/main/java/com/bytechef/file/storage/filesystem/service/FilesystemFileStorageService.java @@ -63,15 +63,21 @@ public FilesystemFileStorageService(String baseDir) { @Override public void deleteFile(String directory, FileEntry fileEntry) { - Path directoryPath = resolveDirectoryPath(directory); - Path filePath = resolveFilePath(directoryPath, directory, fileEntry.getUrl()); + if (fileEntry != null) { + Path directoryPath = resolveDirectoryPath(directory); + Path filePath = resolveFilePath(directoryPath, directory, fileEntry.getUrl()); - File file = filePath.toFile(); + File file = filePath.toFile(); + + if (!file.exists()) { + return; + } - boolean deleted = file.delete(); + boolean deleted = file.delete(); - if (!deleted) { - throw new FileStorageException("File %s cannot be deleted".formatted(directoryPath)); + if (!deleted) { + throw new FileStorageException("File %s cannot be deleted".formatted(filePath)); + } } } diff --git a/server/libs/modules/components/ai/vectorstore/knowledgebase/src/main/java/com/bytechef/component/ai/vectorstore/knowledgebase/cluster/KnowledgeBaseVectorStore.java b/server/libs/modules/components/ai/vectorstore/knowledgebase/src/main/java/com/bytechef/component/ai/vectorstore/knowledgebase/cluster/KnowledgeBaseVectorStore.java index 128c614171a..75df7ebe1d7 100644 --- a/server/libs/modules/components/ai/vectorstore/knowledgebase/src/main/java/com/bytechef/component/ai/vectorstore/knowledgebase/cluster/KnowledgeBaseVectorStore.java +++ b/server/libs/modules/components/ai/vectorstore/knowledgebase/src/main/java/com/bytechef/component/ai/vectorstore/knowledgebase/cluster/KnowledgeBaseVectorStore.java @@ -117,10 +117,10 @@ private static String deriveDocumentName(List documents) { Map metadata = document.getMetadata(); - Object source = metadata.get("source"); + Object filename = metadata.get("filename"); - if (source != null) { - return source.toString(); + if (filename != null) { + return filename.toString(); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/AbstractDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/AbstractDocumentReader.java index 231ea91f8c9..ec9e6b7bd55 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/AbstractDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/AbstractDocumentReader.java @@ -21,6 +21,10 @@ import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import java.io.File; +import java.util.HashMap; +import java.util.Map; +import org.springframework.ai.document.Document; +import org.springframework.ai.document.DocumentReader; import org.springframework.core.io.FileSystemResource; /** @@ -38,6 +42,19 @@ protected static FileResult getFile(Parameters inputParameters, Context context) return new FileResult(fileEntry, fileSystemResource); } + protected static DocumentReader withFilename(DocumentReader reader, String filename) { + return () -> reader.read() + .stream() + .map(document -> { + Map metadata = new HashMap<>(document.getMetadata()); + + metadata.put("filename", filename); + + return new Document(document.getId(), document.getText(), metadata); + }) + .toList(); + } + protected record FileResult(FileEntry fileEntry, FileSystemResource fileSystemResource) { } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java index 995e1b6970f..39d2321c618 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/HtmlJsoupDocumentReader.java @@ -22,6 +22,7 @@ import com.bytechef.component.definition.ClusterElementDefinition; import com.bytechef.component.definition.ComponentDsl; import com.bytechef.component.definition.Context; +import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import com.bytechef.platform.component.definition.ai.vectorstore.DocumentReaderFunction; @@ -49,6 +50,10 @@ protected static org.springframework.ai.document.DocumentReader apply( FileResult result = getFile(inputParameters, context); - return new org.springframework.ai.reader.jsoup.JsoupDocumentReader(result.fileSystemResource()); + FileEntry fileEntry = result.fileEntry(); + + return withFilename( + new org.springframework.ai.reader.jsoup.JsoupDocumentReader(result.fileSystemResource()), + fileEntry.getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java index f1cd2ce7ce8..a3a198e1719 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/JsonDocumentReader.java @@ -25,6 +25,7 @@ import com.bytechef.component.definition.ClusterElementDefinition; import com.bytechef.component.definition.ComponentDsl; import com.bytechef.component.definition.Context; +import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import com.bytechef.platform.component.definition.ai.vectorstore.DocumentReaderFunction; import java.util.List; @@ -51,14 +52,18 @@ public class JsonDocumentReader extends AbstractDocumentReader { .required(false)) .object(() -> JsonDocumentReader::apply); - protected static JsonReader apply( + protected static org.springframework.ai.document.DocumentReader apply( Parameters inputParameters, Parameters connectionParameters, Context context) { FileResult result = getFile(inputParameters, context); List keys = inputParameters.getList(JSON_KEYS_TO_USE, String.class); - return (keys == null) ? new JsonReader(result.fileSystemResource()) + JsonReader jsonReader = (keys == null) ? new JsonReader(result.fileSystemResource()) : new JsonReader(result.fileSystemResource(), keys.toArray(new String[0])); + + FileEntry fileEntry = result.fileEntry(); + + return withFilename(jsonReader, fileEntry.getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java index c8d082644b1..8ef066f6fb6 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/PagePdfDocumentReader.java @@ -22,6 +22,7 @@ import com.bytechef.component.definition.ClusterElementDefinition; import com.bytechef.component.definition.ComponentDsl; import com.bytechef.component.definition.Context; +import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import com.bytechef.platform.component.definition.ai.vectorstore.DocumentReaderFunction; import org.springframework.ai.reader.ExtractedTextFormatter; @@ -41,7 +42,7 @@ public class PagePdfDocumentReader extends AbstractDocumentReader { DOCUMENT_PROPERTY) .object(() -> PagePdfDocumentReader::apply); - protected static org.springframework.ai.reader.pdf.PagePdfDocumentReader apply( + protected static org.springframework.ai.document.DocumentReader apply( Parameters inputParameters, Parameters connectionParameters, Context context) { FileResult result = getFile(inputParameters, context); @@ -55,6 +56,10 @@ protected static org.springframework.ai.reader.pdf.PagePdfDocumentReader apply( .withPagesPerDocument(1) .build(); - return new org.springframework.ai.reader.pdf.PagePdfDocumentReader(result.fileSystemResource(), config); + FileEntry fileEntry = result.fileEntry(); + + return withFilename( + new org.springframework.ai.reader.pdf.PagePdfDocumentReader(result.fileSystemResource(), config), + fileEntry.getName()); } } diff --git a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java index 44f161bf2ce..256efd497c1 100644 --- a/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java +++ b/server/libs/modules/components/ai/vectorstore/reader/src/main/java/com/bytechef/component/ai/vectorstore/reader/cluster/TikaDocumentReader.java @@ -22,6 +22,7 @@ import com.bytechef.component.definition.ClusterElementDefinition; import com.bytechef.component.definition.ComponentDsl; import com.bytechef.component.definition.Context; +import com.bytechef.component.definition.FileEntry; import com.bytechef.component.definition.Parameters; import com.bytechef.platform.component.definition.ai.vectorstore.DocumentReaderFunction; @@ -51,6 +52,10 @@ protected static org.springframework.ai.document.DocumentReader apply( FileResult result = getFile(inputParameters, context); - return new org.springframework.ai.reader.tika.TikaDocumentReader(result.fileSystemResource()); + FileEntry fileEntry = result.fileEntry(); + + return withFilename( + new org.springframework.ai.reader.tika.TikaDocumentReader(result.fileSystemResource()), + fileEntry.getName()); } } diff --git a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-api/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorage.java b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-api/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorage.java index dc7f6e79a45..734aff55330 100644 --- a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-api/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorage.java +++ b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-api/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorage.java @@ -39,6 +39,14 @@ public interface KnowledgeBaseFileStorage { */ void deleteDocument(FileEntry fileEntry); + /** + * Checks whether the chunk content file associated with the given file entry exists in storage. + * + * @param fileEntry the file entry representing the chunk to check + * @return true if the file exists, false otherwise + */ + boolean chunkContentExists(FileEntry fileEntry); + /** * Reads the chunk content associated with the given file entry. * diff --git a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-impl/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorageImpl.java b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-impl/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorageImpl.java index 8caa02c9646..6253ba9063e 100644 --- a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-impl/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorageImpl.java +++ b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-file-storage/platform-knowledge-base-file-storage-impl/src/main/java/com/bytechef/platform/knowledgebase/file/storage/KnowledgeBaseFileStorageImpl.java @@ -38,6 +38,11 @@ public KnowledgeBaseFileStorageImpl(FileStorageService fileStorageService) { this.fileStorageService = fileStorageService; } + @Override + public boolean chunkContentExists(FileEntry fileEntry) { + return fileStorageService.fileExists(KNOWLEDGE_BASE_CHUNKS_DIR, fileEntry); + } + @Override public void deleteChunkContent(FileEntry fileEntry) { fileStorageService.deleteFile(KNOWLEDGE_BASE_CHUNKS_DIR, fileEntry); diff --git a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-service/src/main/java/com/bytechef/platform/knowledgebase/facade/KnowledgeBaseDocumentChunkFacadeImpl.java b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-service/src/main/java/com/bytechef/platform/knowledgebase/facade/KnowledgeBaseDocumentChunkFacadeImpl.java index 213e7b22e61..26bbff9d9c3 100644 --- a/server/libs/platform/platform-knowledge-base/platform-knowledge-base-service/src/main/java/com/bytechef/platform/knowledgebase/facade/KnowledgeBaseDocumentChunkFacadeImpl.java +++ b/server/libs/platform/platform-knowledge-base/platform-knowledge-base-service/src/main/java/com/bytechef/platform/knowledgebase/facade/KnowledgeBaseDocumentChunkFacadeImpl.java @@ -64,7 +64,7 @@ public List getKnowledgeBaseDocumentChunksByDocument for (KnowledgeBaseDocumentChunk chunk : chunks) { FileEntry contentFileEntry = chunk.getContent(); - if (contentFileEntry != null) { + if (contentFileEntry != null && knowledgeBaseFileStorage.chunkContentExists(contentFileEntry)) { String textContent = knowledgeBaseFileStorage.readChunkContent(contentFileEntry); chunk.setTextContent(textContent);