Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
*
* @author Christian Tzolov
* @author Soby Chacko
* @author Alexandros Pappas
*/
@AutoConfiguration
@ConditionalOnClass({ EmbeddingModel.class, SearchIndexClient.class, AzureVectorStore.class })
Expand Down Expand Up @@ -102,6 +103,18 @@ public AzureVectorStore vectorStore(SearchIndexClient searchIndexClient, Embeddi
builder.defaultSimilarityThreshold(properties.getDefaultSimilarityThreshold());
}

if (properties.getContentFieldName() != null) {
builder.contentFieldName(properties.getContentFieldName());
}

if (properties.getEmbeddingFieldName() != null) {
builder.embeddingFieldName(properties.getEmbeddingFieldName());
}

if (properties.getMetadataFieldName() != null) {
builder.metadataFieldName(properties.getMetadataFieldName());
}

return builder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Configuration properties for Azure Vector Store.
*
* @author Christian Tzolov
* @author Alexandros Pappas
*/
@ConfigurationProperties(AzureVectorStoreProperties.CONFIG_PREFIX)
public class AzureVectorStoreProperties extends CommonVectorStoreProperties {
Expand All @@ -42,6 +43,12 @@ public class AzureVectorStoreProperties extends CommonVectorStoreProperties {

private boolean useKeylessAuth;

private String contentFieldName;

private String embeddingFieldName;

private String metadataFieldName;

public String getUrl() {
return this.url;
}
Expand Down Expand Up @@ -90,4 +97,28 @@ public void setUseKeylessAuth(boolean useKeylessAuth) {
this.useKeylessAuth = useKeylessAuth;
}

public String getContentFieldName() {
return this.contentFieldName;
}

public void setContentFieldName(String contentFieldName) {
this.contentFieldName = contentFieldName;
}

public String getEmbeddingFieldName() {
return this.embeddingFieldName;
}

public void setEmbeddingFieldName(String embeddingFieldName) {
this.embeddingFieldName = embeddingFieldName;
}

public String getMetadataFieldName() {
return this.metadataFieldName;
}

public void setMetadataFieldName(String metadataFieldName) {
this.metadataFieldName = metadataFieldName;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
* @author Thomas Vitale
* @author Soby Chacko
* @author Jinwoo Lee
* @author Alexandros Pappas
*/
public class AzureVectorStore extends AbstractObservationVectorStore implements InitializingBean {

Expand Down Expand Up @@ -119,6 +120,12 @@ public class AzureVectorStore extends AbstractObservationVectorStore implements
*/
private final List<MetadataField> filterMetadataFields;

private final String contentFieldName;

private final String embeddingFieldName;

private final String metadataFieldName;

@Nullable
private SearchClient searchClient;

Expand All @@ -145,6 +152,9 @@ protected AzureVectorStore(Builder builder) {
this.defaultTopK = builder.defaultTopK;
this.defaultSimilarityThreshold = builder.defaultSimilarityThreshold;
this.indexName = builder.indexName;
this.contentFieldName = builder.contentFieldName;
this.embeddingFieldName = builder.embeddingFieldName;
this.metadataFieldName = builder.metadataFieldName;
this.filterExpressionConverter = new AzureAiSearchFilterExpressionConverter(this.filterMetadataFields);
}

Expand All @@ -166,9 +176,9 @@ public void doAdd(List<Document> documents) {
final var searchDocuments = documents.stream().map(document -> {
SearchDocument searchDocument = new SearchDocument();
searchDocument.put(ID_FIELD_NAME, document.getId());
searchDocument.put(EMBEDDING_FIELD_NAME, embeddings.get(documents.indexOf(document)));
searchDocument.put(CONTENT_FIELD_NAME, document.getText());
searchDocument.put(METADATA_FIELD_NAME, new JSONObject(document.getMetadata()).toJSONString());
searchDocument.put(this.embeddingFieldName, embeddings.get(documents.indexOf(document)));
searchDocument.put(this.contentFieldName, document.getText());
searchDocument.put(this.metadataFieldName, new JSONObject(document.getMetadata()).toJSONString());

// Add the filterable metadata fields as top level fields, allowing filler
// expressions on them.
Expand Down Expand Up @@ -223,7 +233,7 @@ public List<Document> doSimilaritySearch(SearchRequest request) {
.setKNearestNeighborsCount(request.getTopK())
// Set the fields to compare the vector against. This is a comma-delimited
// list of field names.
.setFields(EMBEDDING_FIELD_NAME);
.setFields(this.embeddingFieldName);

var searchOptions = new SearchOptions()
.setVectorSearchOptions(new VectorSearchOptions().setQueries(vectorQuery));
Expand All @@ -239,18 +249,19 @@ public List<Document> doSimilaritySearch(SearchRequest request) {
.filter(result -> result.getScore() >= request.getSimilarityThreshold())
.map(result -> {

final AzureSearchDocument entry = result.getDocument(AzureSearchDocument.class);
SearchDocument document = result.getDocument(SearchDocument.class);

String id = document.get(ID_FIELD_NAME) != null ? document.get(ID_FIELD_NAME).toString() : "";
String content = document.get(this.contentFieldName) != null
? document.get(this.contentFieldName).toString() : "";
String metadataJson = document.get(this.metadataFieldName) != null
? document.get(this.metadataFieldName).toString() : "";

Map<String, Object> metadata = parseMetadataToMutable(entry.metadata());
Map<String, Object> metadata = parseMetadataToMutable(metadataJson);

metadata.put(DocumentMetadata.DISTANCE.value(), 1.0 - result.getScore());

return Document.builder()
.id(entry.id())
.text(entry.content)
.metadata(metadata)
.score(result.getScore())
.build();
return Document.builder().id(id).text(content).metadata(metadata).score(result.getScore()).build();
})
.collect(Collectors.toList());
}
Expand All @@ -270,15 +281,15 @@ public void afterPropertiesSet() throws Exception {
fields.add(new SearchField(ID_FIELD_NAME, SearchFieldDataType.STRING).setKey(true)
.setFilterable(true)
.setSortable(true));
fields.add(new SearchField(EMBEDDING_FIELD_NAME, SearchFieldDataType.collection(SearchFieldDataType.SINGLE))
fields.add(new SearchField(this.embeddingFieldName, SearchFieldDataType.collection(SearchFieldDataType.SINGLE))
.setSearchable(true)
.setHidden(false)
.setVectorSearchDimensions(dimensions)
// This must match a vector search configuration name.
.setVectorSearchProfileName(SPRING_AI_VECTOR_PROFILE));
fields.add(new SearchField(CONTENT_FIELD_NAME, SearchFieldDataType.STRING).setSearchable(true)
fields.add(new SearchField(this.contentFieldName, SearchFieldDataType.STRING).setSearchable(true)
.setFilterable(true));
fields.add(new SearchField(METADATA_FIELD_NAME, SearchFieldDataType.STRING).setSearchable(true)
fields.add(new SearchField(this.metadataFieldName, SearchFieldDataType.STRING).setSearchable(true)
.setFilterable(true));

for (MetadataField filterableMetadataField : this.filterMetadataFields) {
Expand Down Expand Up @@ -367,13 +378,6 @@ public static MetadataField date(String name) {

}

/**
* Internal data structure for retrieving and storing documents.
*/
private record AzureSearchDocument(String id, String content, List<Float> embedding, String metadata) {

}

/**
* Builder class for creating {@link AzureVectorStore} instances.
* <p>
Expand All @@ -395,6 +399,12 @@ public static class Builder extends AbstractVectorStoreBuilder<Builder> {

private String indexName = DEFAULT_INDEX_NAME;

private String contentFieldName = CONTENT_FIELD_NAME;

private String embeddingFieldName = EMBEDDING_FIELD_NAME;

private String metadataFieldName = METADATA_FIELD_NAME;

private Builder(SearchIndexClient searchIndexClient, EmbeddingModel embeddingModel) {
super(embeddingModel);
Assert.notNull(searchIndexClient, "SearchIndexClient must not be null");
Expand Down Expand Up @@ -460,6 +470,38 @@ public Builder defaultSimilarityThreshold(Double defaultSimilarityThreshold) {
return this;
}

/**
* Sets the content field name in the Azure Search index.
* @param contentFieldName the name of the content field (defaults to "content")
* @return the builder instance
*/
public Builder contentFieldName(@Nullable String contentFieldName) {
this.contentFieldName = contentFieldName != null ? contentFieldName : CONTENT_FIELD_NAME;
return this;
}

/**
* Sets the embedding field name in the Azure Search index.
* @param embeddingFieldName the name of the embedding field (defaults to
* "embedding")
* @return the builder instance
*/
public Builder embeddingFieldName(@Nullable String embeddingFieldName) {
this.embeddingFieldName = embeddingFieldName != null ? embeddingFieldName : EMBEDDING_FIELD_NAME;
return this;
}

/**
* Sets the metadata field name in the Azure Search index.
* @param metadataFieldName the name of the metadata field (defaults to
* "metadata")
* @return the builder instance
*/
public Builder metadataFieldName(@Nullable String metadataFieldName) {
this.metadataFieldName = metadataFieldName != null ? metadataFieldName : METADATA_FIELD_NAME;
return this;
}

@Override
public AzureVectorStore build() {
return new AzureVectorStore(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
/**
* @author Christian Tzolov
* @author Thomas Vitale
* @author Alexandros Pappas
*/
@EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_API_KEY", matches = ".+")
@EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_ENDPOINT", matches = ".+")
Expand Down Expand Up @@ -329,6 +330,49 @@ void getNativeClientTest() {
});
}

@Test
@EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_INDEX_NAME", matches = ".+")
void customFieldNamesTest() throws Exception {
// Test with existing production index that uses custom field names
String existingIndexName = System.getenv("AZURE_AI_SEARCH_INDEX_NAME");
String endpoint = System.getenv("AZURE_AI_SEARCH_ENDPOINT");
String apiKey = System.getenv("AZURE_AI_SEARCH_API_KEY");

SearchIndexClient searchIndexClient = new SearchIndexClientBuilder().endpoint(endpoint)
.credential(new AzureKeyCredential(apiKey))
.buildClient();

TransformersEmbeddingModel embeddingModel = new TransformersEmbeddingModel();
embeddingModel.afterPropertiesSet();

// Create vector store with custom field names matching the production index
// Index uses: chunk_text (content), embedding, metadata
VectorStore vectorStore = AzureVectorStore.builder(searchIndexClient, embeddingModel)
.indexName(existingIndexName)
.initializeSchema(false) // Don't create - use existing index
.contentFieldName("chunk_text") // Custom field name!
.embeddingFieldName("embedding") // Standard name
.metadataFieldName("metadata") // Standard name
.build();

// Trigger initialization
((AzureVectorStore) vectorStore).afterPropertiesSet();

// Search the existing index
List<Document> results = vectorStore
.similaritySearch(SearchRequest.builder().query("Azure Databricks").topK(3).build());

// Verify we got results
assertThat(results).isNotEmpty();
assertThat(results.size()).isLessThanOrEqualTo(3);

// Verify documents have content (from chunk_text field)
Document firstDoc = results.get(0);
assertThat(firstDoc.getId()).isNotNull();
assertThat(firstDoc.getText()).isNotEmpty();
assertThat(firstDoc.getScore()).isNotNull();
}

@SpringBootConfiguration
@EnableAutoConfiguration
public static class Config {
Expand Down