Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ public sealed class CreateAutoEmbeddingVectorSearchIndexModel<TDocument> : Creat
/// </summary>
public VectorEmbeddingModality Modality { get; init; } = VectorEmbeddingModality.Text;

/// <summary>
/// The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors. For auto-embedding indexes,
/// this defaults to <see cref="VectorSimilarity.DotProduct"/> when
/// <see cref="CreateVectorSearchIndexModelBase{TDocument}.Quantization"/> is
/// <see cref="VectorQuantization.None"/> or <see cref="VectorQuantization.Scalar"/>, and
/// <see cref="VectorSimilarity.Euclidean"/> when
/// <see cref="CreateVectorSearchIndexModelBase{TDocument}.Quantization"/> is
/// <see cref="VectorQuantization.Binary"/> or <see cref="VectorQuantization.BinaryNoRescore"/>.
/// </summary>
public VectorSimilarity? Similarity { get; init; }

/// <summary>
/// Initializes a new instance of the <see cref="CreateAutoEmbeddingVectorSearchIndexModel{TDocument}"/> for a vector index
/// that will automatically create embeddings from a given field in the document. The embedding model to use must
Expand Down Expand Up @@ -79,19 +90,97 @@ public CreateAutoEmbeddingVectorSearchIndexModel(
{
}

/// <summary>
/// Creates a new <see cref="CreateAutoEmbeddingVectorSearchIndexModel{TDocument}"/> with the given fields
/// configured to be stored in the index. Note that storing full documents might significantly impact
/// performance during indexing and querying. Explicitly storing vector fields is not recommended.
/// </summary>
/// <param name="includedStoredFields">The fields to store.</param>
/// <returns>A new model with the fields configured.</returns>
public CreateAutoEmbeddingVectorSearchIndexModel<TDocument> WithIncludedStoredFields(
params FieldDefinition<TDocument>[] includedStoredFields)
=> new(Field, Name, AutoEmbeddingModelName, FilterFields.ToArray())
{
IncludedStoredFields = includedStoredFields,
ExcludedStoredFields = null,
Modality = Modality,
Similarity = Similarity,
Dimensions = Dimensions,
Quantization = Quantization,
HnswMaxEdges = HnswMaxEdges,
HnswNumEdgeCandidates = HnswNumEdgeCandidates,
};

/// <summary>
/// Creates a new <see cref="CreateAutoEmbeddingVectorSearchIndexModel{TDocument}"/> with the given fields
/// configured to be stored in the index. Note that storing full documents might significantly impact
/// performance during indexing and querying. Explicitly storing vector fields is not recommended.
/// </summary>
/// <param name="includedStoredFields">The fields to store.</param>
/// <returns>A new model with the fields configured.</returns>
public CreateAutoEmbeddingVectorSearchIndexModel<TDocument> WithIncludedStoredFields(
params Expression<Func<TDocument, object>>[] includedStoredFields)
=> WithIncludedStoredFields(includedStoredFields
.Select(f => (FieldDefinition<TDocument>)new ExpressionFieldDefinition<TDocument>(f)).ToArray());

/// <summary>
/// Creates a new <see cref="CreateAutoEmbeddingVectorSearchIndexModel{TDocument}"/> with the given fields
/// configured to be excluded from being stored in the index. This is typically used to exclude vector fields
/// from being stored when other fields should be stored.
/// </summary>
/// <param name="excludedStoredFields">The fields to exclude from being stored.</param>
/// <returns>A new model with the fields configured.</returns>
public CreateAutoEmbeddingVectorSearchIndexModel<TDocument> WithExcludedStoredFields(
params FieldDefinition<TDocument>[] excludedStoredFields)
=> new(Field, Name, AutoEmbeddingModelName, FilterFields.ToArray())
{
ExcludedStoredFields = excludedStoredFields,
IncludedStoredFields = null,
Modality = Modality,
Similarity = Similarity,
Dimensions = Dimensions,
Quantization = Quantization,
HnswMaxEdges = HnswMaxEdges,
HnswNumEdgeCandidates = HnswNumEdgeCandidates,
};

/// <summary>
/// Creates a new <see cref="CreateAutoEmbeddingVectorSearchIndexModel{TDocument}"/> with the given fields
/// configured to be excluded from being stored in the index. This is typically used to exclude vector fields
/// from being stored when other fields should be stored.
/// </summary>
/// <param name="excludedStoredFields">The fields to exclude from being stored.</param>
/// <returns>A new model with the fields configured.</returns>
public CreateAutoEmbeddingVectorSearchIndexModel<TDocument> WithExcludedStoredFields(
params Expression<Func<TDocument, object>>[] excludedStoredFields)
=> WithExcludedStoredFields(excludedStoredFields
.Select(f => (FieldDefinition<TDocument>)new ExpressionFieldDefinition<TDocument>(f)).ToArray());

/// <inheritdoc/>
internal override BsonDocument Render(RenderArgs<TDocument> renderArgs)
{
var similarityValue = Similarity == VectorSimilarity.DotProduct
? "dotProduct" // Because neither "DotProduct" or "dotproduct" are allowed.
: Similarity?.ToString().ToLowerInvariant();

var vectorField = new BsonDocument
{
{ "type", "autoEmbed" },
{ "path", Field.Render(renderArgs).FieldName },
{ "modality", Modality.ToString().ToLowerInvariant() },
{ "model", AutoEmbeddingModelName },
{ "similarity", similarityValue, similarityValue != null },
{ "numDimensions", Dimensions, Dimensions != 0 },
};

RenderCommonFieldElements(renderArgs, vectorField);

var fieldDocuments = new List<BsonDocument> { vectorField };
RenderFilterFields(renderArgs, fieldDocuments);
return new BsonDocument { { "fields", new BsonArray(fieldDocuments) } };

var indexDefinition = new BsonDocument { { "fields", new BsonArray(fieldDocuments) } };
RenderCommonElements(renderArgs, indexDefinition);

return indexDefinition;
}
}
98 changes: 66 additions & 32 deletions src/MongoDB.Driver/CreateVectorSearchIndexModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,27 +31,6 @@ public sealed class CreateVectorSearchIndexModel<TDocument> : CreateVectorSearch
/// </summary>
public VectorSimilarity Similarity { get; }

/// <summary>
/// Number of vector dimensions that vector search enforces at index-time and query-time.
/// </summary>
public int Dimensions { get; }

/// <summary>
/// Type of automatic vector quantization for your vectors.
/// </summary>
public VectorQuantization? Quantization { get; init; }

/// <summary>
/// Maximum number of edges (or connections) that a node can have in the Hierarchical Navigable Small Worlds graph.
/// </summary>
public int? HnswMaxEdges { get; init; }

/// <summary>
/// Analogous to numCandidates at query-time, this parameter controls the maximum number of nodes to evaluate to
/// find the closest neighbors to connect to a new node.
/// </summary>
public int? HnswNumEdgeCandidates { get; init; }

/// <summary>
/// Initializes a new instance of the <see cref="CreateVectorSearchIndexModel{TDocument}"/> class for a vector
/// index where the vector embeddings are created manually. The required options for <see cref="VectorSimilarity"/>
Expand Down Expand Up @@ -101,6 +80,66 @@ public CreateVectorSearchIndexModel(
{
}

/// <summary>
/// Creates a new <see cref="CreateVectorSearchIndexModel{TDocument}"/> with the given fields configured
/// to be stored in the index. Note that storing full documents might significantly impact
/// performance during indexing and querying. Explicitly storing vector fields is not recommended.
/// </summary>
/// <param name="includedStoredFields">The fields to store.</param>
/// <returns>A new model with the fields configured.</returns>
public CreateVectorSearchIndexModel<TDocument> WithIncludedStoredFields(
params FieldDefinition<TDocument>[] includedStoredFields)
=> new(Field, Name, Similarity, Dimensions, FilterFields.ToArray())
{
IncludedStoredFields = includedStoredFields,
ExcludedStoredFields = null,
Quantization = Quantization,
HnswMaxEdges = HnswMaxEdges,
HnswNumEdgeCandidates = HnswNumEdgeCandidates
};

/// <summary>
/// Creates a new <see cref="CreateVectorSearchIndexModel{TDocument}"/> with the given fields configured
/// to be stored in the index. Note that storing full documents might significantly impact
/// performance during indexing and querying. Explicitly storing vector fields is not recommended.
/// </summary>
/// <param name="includedStoredFields">The fields to store.</param>
/// <returns>A new model with the fields configured.</returns>
public CreateVectorSearchIndexModel<TDocument> WithIncludedStoredFields(
params Expression<Func<TDocument, object>>[] includedStoredFields)
=> WithIncludedStoredFields(includedStoredFields
.Select(f => (FieldDefinition<TDocument>)new ExpressionFieldDefinition<TDocument>(f)).ToArray());

/// <summary>
/// Creates a new <see cref="CreateVectorSearchIndexModel{TDocument}"/> with the given fields configured
/// to be excluded from being stored in the index. This is typically used to exclude vector fields from being
/// stored when other fields should be stored.
/// </summary>
/// <param name="excludedStoredFields">The fields to exclude from being stored.</param>
/// <returns>A new model with the fields configured.</returns>
public CreateVectorSearchIndexModel<TDocument> WithExcludedStoredFields(
params FieldDefinition<TDocument>[] excludedStoredFields)
=> new(Field, Name, Similarity, Dimensions, FilterFields.ToArray())
{
ExcludedStoredFields = excludedStoredFields,
IncludedStoredFields = null,
Quantization = Quantization,
HnswMaxEdges = HnswMaxEdges,
HnswNumEdgeCandidates = HnswNumEdgeCandidates
};

/// <summary>
/// Creates a new <see cref="CreateVectorSearchIndexModel{TDocument}"/> with the given fields configured
/// to be excluded from being stored in the index. This is typically used to exclude vector fields from being
/// stored when other fields should be stored.
/// </summary>
/// <param name="excludedStoredFields">The fields to exclude from being stored.</param>
/// <returns>A new model with the fields configured.</returns>
public CreateVectorSearchIndexModel<TDocument> WithExcludedStoredFields(
params Expression<Func<TDocument, object>>[] excludedStoredFields)
=> WithExcludedStoredFields(excludedStoredFields
.Select(f => (FieldDefinition<TDocument>)new ExpressionFieldDefinition<TDocument>(f)).ToArray());

/// <inheritdoc/>
internal override BsonDocument Render(RenderArgs<TDocument> renderArgs)
{
Expand All @@ -116,19 +155,14 @@ internal override BsonDocument Render(RenderArgs<TDocument> renderArgs)
{ "similarity", similarityValue },
};

vectorField.Add("quantization", Quantization.ToString()?.ToLowerInvariant(), Quantization.HasValue);

if (HnswMaxEdges != null || HnswNumEdgeCandidates != null)
{
vectorField.Add("hnswOptions",
new BsonDocument
{
{ "maxEdges", HnswMaxEdges ?? 16 }, { "numEdgeCandidates", HnswNumEdgeCandidates ?? 100 }
});
}
RenderCommonFieldElements(renderArgs, vectorField);

var fieldDocuments = new List<BsonDocument> { vectorField };
RenderFilterFields(renderArgs, fieldDocuments);
return new BsonDocument { { "fields", new BsonArray(fieldDocuments) } };

var indexDefinition = new BsonDocument { { "fields", new BsonArray(fieldDocuments) } };
RenderCommonElements(renderArgs, indexDefinition);

return indexDefinition;
}
}
92 changes: 92 additions & 0 deletions src/MongoDB.Driver/CreateVectorSearchIndexModelBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using System.Collections.Generic;
using System.Linq;
using MongoDB.Bson;
using MongoDB.Driver.Core.Misc;

namespace MongoDB.Driver;

Expand All @@ -34,6 +35,44 @@ public abstract class CreateVectorSearchIndexModelBase<TDocument> : CreateSearch
/// </summary>
public IReadOnlyList<FieldDefinition<TDocument>> FilterFields { get; }

/// <summary>
/// The fields that must be stored in the index. Use
/// <see cref="CreateVectorSearchIndexModel{TDocument}.WithIncludedStoredFields(FieldDefinition{TDocument}[])"/>
/// or <see cref="CreateAutoEmbeddingVectorSearchIndexModel{TDocument}.WithIncludedStoredFields(FieldDefinition{TDocument}[])"/>
/// to configure this.
/// </summary>
public IReadOnlyList<FieldDefinition<TDocument>> IncludedStoredFields { get; protected init; }

/// <summary>
/// The fields that must NOT be stored in the index. Use
/// <see cref="CreateVectorSearchIndexModel{TDocument}.WithExcludedStoredFields(FieldDefinition{TDocument}[])"/>
/// or <see cref="CreateAutoEmbeddingVectorSearchIndexModel{TDocument}.WithExcludedStoredFields(FieldDefinition{TDocument}[])"/>
/// to configure this.
/// </summary>
public IReadOnlyList<FieldDefinition<TDocument>> ExcludedStoredFields { get; protected init; }

/// <summary>
/// Number of vector dimensions that vector search enforces at index-time and query-time, or uses to build
/// the embeddings for auto-embedding indexes.
/// </summary>
public int Dimensions { get; init; }

/// <summary>
/// Type of automatic vector quantization for your vectors.
/// </summary>
public VectorQuantization? Quantization { get; init; }

/// <summary>
/// Maximum number of edges (or connections) that a node can have in the Hierarchical Navigable Small Worlds graph.
/// </summary>
public int? HnswMaxEdges { get; init; }

/// <summary>
/// Analogous to numCandidates at query-time, this parameter controls the maximum number of nodes to evaluate to
/// find the closest neighbors to connect to a new node.
/// </summary>
public int? HnswNumEdgeCandidates { get; init; }

/// <summary>
/// Initializes a new instance of the <see cref="CreateVectorSearchIndexModelBase{TDocument}"/> class for a vector
/// index where the vector embeddings are created manually. The required options for <see cref="VectorSimilarity"/>
Expand All @@ -48,6 +87,8 @@ protected CreateVectorSearchIndexModelBase(
params FieldDefinition<TDocument>[] filterFields)
: base(name, SearchIndexType.VectorSearch)
{
Ensure.IsNotNull(field, nameof(field));

Field = field;
FilterFields = filterFields?.ToList() ?? [];
}
Expand Down Expand Up @@ -79,4 +120,55 @@ private protected void RenderFilterFields(RenderArgs<TDocument> renderArgs, List
}
}
}

/// <summary>
/// Called by subclasses to render common top-level elements in the index definition.
/// </summary>
/// <param name="renderArgs">The render args.</param>
/// <param name="indexDocument">The index document into which the elements will go.</param>
private protected void RenderCommonElements(RenderArgs<TDocument> renderArgs, BsonDocument indexDocument)
{
var fieldName = Field.Render(renderArgs).FieldName;
var dotPos = fieldName.LastIndexOf('.');
if (dotPos > 0)
{
indexDocument.Add("nestedRoot", fieldName.Substring(0, dotPos));
}

var exclude = ExcludedStoredFields?.Any() == true;
if (exclude || IncludedStoredFields?.Any() == true)
{
var fields = new BsonArray();
foreach (var field in exclude ? ExcludedStoredFields : IncludedStoredFields)
{
fields.Add(field.Render(renderArgs).FieldName);
}

indexDocument.Add("storedSource", new BsonDocument { { exclude ? "exclude" : "include", fields } });
}
}

/// <summary>
/// Called by subclasses to render common elements in a field of the index definition.
/// </summary>
/// <param name="renderArgs">The render args.</param>
/// <param name="fieldDocument">The field document into which the elements will go.</param>
private protected void RenderCommonFieldElements(RenderArgs<TDocument> renderArgs, BsonDocument fieldDocument)
{
if (Quantization != null && Quantization != VectorQuantization.None)
{
fieldDocument.Add("quantization", Quantization == VectorQuantization.BinaryNoRescore
? "binaryNoRescore"
: Quantization.ToString().ToLowerInvariant());
}

if (HnswMaxEdges != null || HnswNumEdgeCandidates != null)
{
fieldDocument.Add("hnswOptions",
new BsonDocument
{
{ "maxEdges", HnswMaxEdges ?? 16 }, { "numEdgeCandidates", HnswNumEdgeCandidates ?? 100 }
});
}
}
}
Loading