Skip to content

Commit d82eec3

Browse files
lsh1215ilayaperumalg
authored andcommitted
OpenSearch: omit explicit IDs when manageDocumentIds=false; add unit/ITs; AWS Serverless compat.
- Update OpenSearchVectorStore#doAdd to omit explicit document IDs when manageDocumentIds=false, enabling AWS OpenSearch Serverless compatibility - Add unit tests for document ID management logic in doAdd - Add integration tests covering explicit/non-explicit ID modes and delete-by-ID behavior Closes gh-3818 Signed-off-by: sanghun <vitash1215@gmail.com> Set manageDocumentIds default to true for backward compatibility The manageDocumentIds flag was initially set to false, which would break existing users who rely on explicit document ID management. This change sets the default to true to preserve the current behavior for all existing OpenSearch users. AWS OpenSearch Serverless users can explicitly opt-in by setting manageDocumentIds(false) when they need auto-generated IDs due to the platform's restrictions on custom document IDs. This ensures backward compatibility while still providing the flexibility needed for AWS Serverless environments. Related: gh-3818 Signed-off-by: sanghun <vitash1215@gmail.com> Fix Checkstyle violations in OpenSearchVectorStoreTest Resolved 14 Checkstyle errors that blocked the build process: - Corrected import statement ordering - Added 'this.' qualifier to instance variable references - Added missing newline at end of file This ensures compliance with Spring AI coding standards and enables successful compilation after rebasing onto upstream/main. Signed-off-by: sanghun <vitash1215@gmail.com> OpenSearch: omit explicit IDs when manageDocumentIds=false; add unit/ITs; AWS Serverless compat. - Update OpenSearchVectorStore#doAdd to omit explicit document IDs when manageDocumentIds=false, enabling AWS OpenSearch Serverless compatibility - Add unit tests for document ID management logic in doAdd - Add integration tests covering explicit/non-explicit ID modes and delete-by-ID behavior Closes gh-3818 Set manageDocumentIds default to true for backward compatibility AWS OpenSearch Serverless users can explicitly opt-in by setting manageDocumentIds(false) when they need auto-generated IDs due to the platform's restrictions on custom document IDs. This ensures backward compatibility while still providing the flexibility needed for AWS Serverless environments. Related: gh-3818 Signed-off-by: sanghun <vitash1215@gmail.com>
1 parent e16d75a commit d82eec3

File tree

3 files changed

+414
-2
lines changed

3 files changed

+414
-2
lines changed

vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@
102102
* }</pre>
103103
*
104104
* <p>
105+
* AWS OpenSearch Serverless usage example:
106+
* </p>
107+
* <pre>{@code
108+
* OpenSearchVectorStore vectorStore = OpenSearchVectorStore.builder(openSearchClient, embeddingModel)
109+
* .initializeSchema(true)
110+
* .manageDocumentIds(false) // Required for AWS OpenSearch Serverless
111+
* .build();
112+
* }</pre>
113+
*
114+
* <p>
105115
* Advanced configuration example:
106116
* </p>
107117
* <pre>{@code
@@ -137,6 +147,7 @@
137147
* @author Christian Tzolov
138148
* @author Thomas Vitale
139149
* @author inpink
150+
* @author Sanghun Lee
140151
* @since 1.0.0
141152
*/
142153
public class OpenSearchVectorStore extends AbstractObservationVectorStore implements InitializingBean {
@@ -174,6 +185,8 @@ public class OpenSearchVectorStore extends AbstractObservationVectorStore implem
174185

175186
private final int dimensions;
176187

188+
private final boolean manageDocumentIds;
189+
177190
/**
178191
* Creates a new OpenSearchVectorStore using the builder pattern.
179192
* @param builder The configured builder instance
@@ -193,6 +206,7 @@ protected OpenSearchVectorStore(Builder builder) {
193206
this.initializeSchema = builder.initializeSchema;
194207
this.useApproximateKnn = builder.useApproximateKnn;
195208
this.dimensions = builder.dimensions;
209+
this.manageDocumentIds = builder.manageDocumentIds;
196210
}
197211

198212
/**
@@ -216,14 +230,27 @@ public void doAdd(List<Document> documents) {
216230
for (Document document : documents) {
217231
OpenSearchDocument openSearchDocument = new OpenSearchDocument(document.getId(), document.getText(),
218232
document.getMetadata(), embedding.get(documents.indexOf(document)));
219-
bulkRequestBuilder.operations(op -> op
220-
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
233+
234+
// Conditionally set document ID based on manageDocumentIds flag
235+
if (this.manageDocumentIds) {
236+
bulkRequestBuilder.operations(op -> op
237+
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
238+
}
239+
else {
240+
bulkRequestBuilder
241+
.operations(op -> op.index(idx -> idx.index(this.index).document(openSearchDocument)));
242+
}
221243
}
222244
bulkRequest(bulkRequestBuilder.build());
223245
}
224246

225247
@Override
226248
public void doDelete(List<String> idList) {
249+
if (!this.manageDocumentIds) {
250+
logger.warn("Document ID management is disabled. Delete operations may not work as expected "
251+
+ "since document IDs are auto-generated by OpenSearch. Consider using filter-based deletion instead.");
252+
}
253+
227254
BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
228255
for (String id : idList) {
229256
bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.index).id(id)));
@@ -481,6 +508,8 @@ public static class Builder extends AbstractVectorStoreBuilder<Builder> {
481508

482509
private int dimensions = 1536;
483510

511+
private boolean manageDocumentIds = true;
512+
484513
/**
485514
* Sets the OpenSearch client.
486515
* @param openSearchClient The OpenSearch client to use
@@ -585,6 +614,28 @@ public Builder dimensions(int dimensions) {
585614
return this;
586615
}
587616

617+
/**
618+
* Sets whether to manage document IDs during indexing operations.
619+
* <p>
620+
* When set to {@code true} (default), document IDs will be explicitly set during
621+
* indexing operations. When set to {@code false}, OpenSearch will auto-generate
622+
* document IDs, which is required for AWS OpenSearch Serverless vector search
623+
* collections.
624+
* </p>
625+
* <p>
626+
* Note: When document ID management is disabled, the {@link #doDelete(List)}
627+
* method may not work as expected since document IDs are auto-generated by
628+
* OpenSearch.
629+
* </p>
630+
* @param manageDocumentIds true to manage document IDs (default), false to let
631+
* OpenSearch auto-generate IDs
632+
* @return The builder instance
633+
*/
634+
public Builder manageDocumentIds(boolean manageDocumentIds) {
635+
this.manageDocumentIds = manageDocumentIds;
636+
return this;
637+
}
638+
588639
/**
589640
* Builds a new OpenSearchVectorStore instance with the configured properties.
590641
* @return A new OpenSearchVectorStore instance

vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreIT.java

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,161 @@ public void approximateSearchThresholdTest(String similarityFunction) {
808808
});
809809
}
810810

811+
@ParameterizedTest(name = "manageDocumentIds={0}")
812+
@ValueSource(booleans = { true, false })
813+
void testManageDocumentIdsSetting(boolean manageDocumentIds) {
814+
getContextRunner().run(context -> {
815+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
816+
817+
// Create a new vector store with specific manageDocumentIds setting
818+
OpenSearchVectorStore testVectorStore = OpenSearchVectorStore
819+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
820+
context.getBean(EmbeddingModel.class))
821+
.manageDocumentIds(manageDocumentIds)
822+
.index("test_manage_document_ids_" + manageDocumentIds)
823+
.initializeSchema(true)
824+
.build();
825+
826+
// Test documents
827+
List<Document> testDocuments = List.of(new Document("doc1", "Test content 1", Map.of("key1", "value1")),
828+
new Document("doc2", "Test content 2", Map.of("key2", "value2")));
829+
830+
// Add documents
831+
testVectorStore.add(testDocuments);
832+
833+
// Wait for indexing
834+
Awaitility.await()
835+
.until(() -> testVectorStore
836+
.similaritySearch(SearchRequest.builder().query("Test content").topK(2).build()), hasSize(2));
837+
838+
// Search and verify results
839+
List<Document> results = testVectorStore
840+
.similaritySearch(SearchRequest.builder().query("Test content").topK(2).build());
841+
842+
assertThat(results).hasSize(2);
843+
844+
// Verify document content and metadata are preserved
845+
assertThat(results.stream().map(Document::getText).toList()).containsExactlyInAnyOrder("Test content 1",
846+
"Test content 2");
847+
848+
assertThat(results.stream().map(doc -> doc.getMetadata().get("key1")).toList()).contains("value1");
849+
assertThat(results.stream().map(doc -> doc.getMetadata().get("key2")).toList()).contains("value2");
850+
851+
// Clean up
852+
testVectorStore.delete(testDocuments.stream().map(Document::getId).toList());
853+
});
854+
}
855+
856+
@Test
857+
void testManageDocumentIdsFalseForAWSOpenSearchServerless() {
858+
getContextRunner().run(context -> {
859+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
860+
861+
// Create vector store with manageDocumentIds=false (AWS OpenSearch Serverless
862+
// mode)
863+
OpenSearchVectorStore awsCompatibleVectorStore = OpenSearchVectorStore
864+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
865+
context.getBean(EmbeddingModel.class))
866+
.manageDocumentIds(false)
867+
.index("test_aws_serverless_compatible")
868+
.initializeSchema(true)
869+
.build();
870+
871+
// Test documents with IDs (these should be ignored when
872+
// manageDocumentIds=false)
873+
List<Document> testDocuments = List.of(
874+
new Document("custom-id-1", "AWS Serverless content 1", Map.of("env", "aws-serverless")),
875+
new Document("custom-id-2", "AWS Serverless content 2", Map.of("env", "aws-serverless")));
876+
877+
// Add documents - should work without explicit document ID errors
878+
awsCompatibleVectorStore.add(testDocuments);
879+
880+
// Wait for indexing
881+
Awaitility.await()
882+
.until(() -> awsCompatibleVectorStore
883+
.similaritySearch(SearchRequest.builder().query("AWS Serverless").topK(2).build()), hasSize(2));
884+
885+
// Search and verify results
886+
List<Document> results = awsCompatibleVectorStore
887+
.similaritySearch(SearchRequest.builder().query("AWS Serverless").topK(2).build());
888+
889+
assertThat(results).hasSize(2);
890+
891+
// Verify content is preserved
892+
assertThat(results.stream().map(Document::getText).toList())
893+
.containsExactlyInAnyOrder("AWS Serverless content 1", "AWS Serverless content 2");
894+
895+
// Verify metadata is preserved
896+
assertThat(results.stream().map(doc -> doc.getMetadata().get("env")).toList())
897+
.containsOnly("aws-serverless");
898+
899+
// Clean up
900+
awsCompatibleVectorStore.delete(List.of("_all"));
901+
});
902+
}
903+
904+
@Test
905+
void testManageDocumentIdsTrueWithExplicitIds() {
906+
getContextRunner().run(context -> {
907+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
908+
909+
// Create vector store with manageDocumentIds=true (default behavior)
910+
OpenSearchVectorStore explicitIdVectorStore = OpenSearchVectorStore
911+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
912+
context.getBean(EmbeddingModel.class))
913+
.manageDocumentIds(true)
914+
.index("test_explicit_ids")
915+
.initializeSchema(true)
916+
.build();
917+
918+
// Test documents with specific IDs
919+
List<Document> testDocuments = List.of(
920+
new Document("explicit-id-1", "Explicit ID content 1", Map.of("type", "explicit")),
921+
new Document("explicit-id-2", "Explicit ID content 2", Map.of("type", "explicit")));
922+
923+
// Add documents
924+
explicitIdVectorStore.add(testDocuments);
925+
926+
// Wait for indexing
927+
Awaitility.await()
928+
.until(() -> explicitIdVectorStore
929+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build()), hasSize(2));
930+
931+
// Search and verify results
932+
List<Document> results = explicitIdVectorStore
933+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build());
934+
935+
assertThat(results).hasSize(2);
936+
937+
// Verify document IDs are preserved
938+
assertThat(results.stream().map(Document::getId).toList()).containsExactlyInAnyOrder("explicit-id-1",
939+
"explicit-id-2");
940+
941+
// Verify content and metadata
942+
assertThat(results.stream().map(Document::getText).toList())
943+
.containsExactlyInAnyOrder("Explicit ID content 1", "Explicit ID content 2");
944+
945+
assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).toList()).containsOnly("explicit");
946+
947+
// Test deletion by specific IDs
948+
explicitIdVectorStore.delete(List.of("explicit-id-1"));
949+
950+
Awaitility.await()
951+
.until(() -> explicitIdVectorStore
952+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build()), hasSize(1));
953+
954+
// Verify only one document remains
955+
results = explicitIdVectorStore
956+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build());
957+
958+
assertThat(results).hasSize(1);
959+
assertThat(results.get(0).getId()).isEqualTo("explicit-id-2");
960+
961+
// Clean up
962+
explicitIdVectorStore.delete(List.of("explicit-id-2"));
963+
});
964+
}
965+
811966
@SpringBootConfiguration
812967
public static class TestApplication {
813968

0 commit comments

Comments
 (0)