diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs new file mode 100644 index 00000000000..31836464501 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -0,0 +1,65 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using Microsoft.Extensions.VectorData; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Represents the base record type used by to store ingested chunks in a vector store. +/// +/// The type of the chunk content. +/// +/// When the vector dimension count is not known at compile time, +/// use the +/// helper to create a and pass it to the constructor. +/// When the vector dimension count is known at compile time, derive from this class and add +/// the to the property. +/// +public class IngestedChunkRecord +{ + /// + /// The storage name for the property. + /// + protected const string EmbeddingStorageName = VectorStoreExtensions.EmbeddingStorageName; + + private const string KeyStorageName = "key"; + private const string DocumentIdStorageName = "documentid"; + private const string ContentStorageName = "content"; + private const string ContextStorageName = "context"; + + /// + /// Gets or sets the unique key for this record. + /// + [VectorStoreKey(StorageName = KeyStorageName)] + public virtual Guid Key { get; set; } + + /// + /// Gets or sets the identifier of the document from which this chunk was extracted. + /// + [VectorStoreData(StorageName = DocumentIdStorageName)] + public virtual string DocumentId { get; set; } = string.Empty; + + /// + /// Gets or sets the content of the chunk. + /// + [VectorStoreData(StorageName = ContentStorageName)] + public virtual TChunk? Content { get; set; } + + /// + /// Gets or sets additional context for the chunk. + /// + [VectorStoreData(StorageName = ContextStorageName)] + public virtual string? Context { get; set; } + + /// + /// Gets the embedding value for this record. + /// + /// + /// By default, returns the value. The vector store's embedding generator + /// will convert this to a vector. Override this property in derived classes to add + /// the with the appropriate dimension count. + /// + public virtual TChunk? Embedding => Content; +} diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreExtensions.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreExtensions.cs new file mode 100644 index 00000000000..7bc53ec7357 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreExtensions.cs @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Extensions.VectorData; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Provides extension methods for working with vector stores in the context of data ingestion. +/// +public static class VectorStoreExtensions +{ + internal const string EmbeddingStorageName = "embedding"; + + /// + /// Provides a convenient method to get a vector store collection specifically designed for storing ingested chunk records. + /// + /// The type of the record to be stored in the collection. + /// The type of the chunk content. + /// The vector store instance to create the collection in. + /// The name of the collection to be created. + /// The number of dimensions that the vector has. + /// The storage name for the vector property. + /// + /// The distance function to use. When not provided, the default specific to given database will be used. + /// Check for available values. + /// + /// The index kind to use. + /// A vector store collection configured for ingested chunk records. + [RequiresDynamicCode("This API is not compatible with NativeAOT. You can implement your own IngestionChunkWriter that uses dynamic mapping via VectorStore.GetCollectionDynamic().")] + [RequiresUnreferencedCode("This API is not compatible with trimming. You can implement your own IngestionChunkWriter that uses dynamic mapping via VectorStore.GetCollectionDynamic().")] + public static VectorStoreCollection GetIngestionRecordCollection(this VectorStore vectorStore, + string collectionName, int dimensionCount, string storageName = EmbeddingStorageName, string? distanceFunction = null, string? indexKind = null) + where TRecord : IngestedChunkRecord, new() + { + _ = Shared.Diagnostics.Throw.IfNull(vectorStore); + _ = Shared.Diagnostics.Throw.IfNullOrEmpty(collectionName); + _ = Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0); + _ = Shared.Diagnostics.Throw.IfNullOrEmpty(storageName); + + VectorStoreCollectionDefinition additiveDefintion = new() + { + Properties = + { + // By using TChunk as the type here we allow the vector store + // to handle the conversion from TChunk to the actual vector type it supports. + new VectorStoreVectorProperty(nameof(IngestedChunkRecord<>.Embedding), typeof(TChunk), dimensionCount) + { + StorageName = storageName, + DistanceFunction = distanceFunction, + IndexKind = indexKind, + }, + }, + }; + + return vectorStore.GetCollection(collectionName, additiveDefintion); + } +} diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs index a10a6595095..8182892bb43 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs @@ -11,66 +11,52 @@ namespace Microsoft.Extensions.DataIngestion; /// -/// Writes chunks to the using the default schema. +/// Writes chunks to a . /// -/// The type of the chunk content. -public sealed class VectorStoreWriter : IngestionChunkWriter +/// The type of the chunk content. +/// The type of the record stored in the vector store. +public class VectorStoreWriter : IngestionChunkWriter + where TRecord : IngestedChunkRecord, new() { - // The names are lowercase with no special characters to ensure compatibility with various vector stores. - private const string KeyName = "key"; - private const string EmbeddingName = "embedding"; - private const string ContentName = "content"; - private const string ContextName = "context"; - private const string DocumentIdName = "documentid"; - - private readonly VectorStore _vectorStore; - private readonly int _dimensionCount; private readonly VectorStoreWriterOptions _options; - - private VectorStoreCollection>? _vectorStoreCollection; + private bool _collectionEnsured; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// - /// The to use to store the instances. - /// The number of dimensions that the vector has. This value is required when creating collections. + /// The to use to store the instances. /// The options for the vector store writer. - /// When is null. - /// When is less or equal zero. - public VectorStoreWriter(VectorStore vectorStore, int dimensionCount, VectorStoreWriterOptions? options = default) + /// When is null. + /// + /// You can use the + /// helper to create a with the appropriate schema for storing ingestion chunks. + /// + public VectorStoreWriter(VectorStoreCollection collection, VectorStoreWriterOptions? options = default) { - _vectorStore = Throw.IfNull(vectorStore); - _dimensionCount = Throw.IfLessThanOrEqual(dimensionCount, 0); + VectorStoreCollection = Throw.IfNull(collection); _options = options ?? new VectorStoreWriterOptions(); } /// /// Gets the underlying used to store the chunks. /// - /// - /// The collection is initialized when is called for the first time. - /// - /// The collection has not been initialized yet. - /// Call first. - public VectorStoreCollection> VectorStoreCollection - => _vectorStoreCollection ?? throw new InvalidOperationException("The collection has not been initialized yet. Call WriteAsync first."); + public VectorStoreCollection VectorStoreCollection { get; } /// - public override async Task WriteAsync(IAsyncEnumerable> chunks, CancellationToken cancellationToken = default) + public override async Task WriteAsync(IAsyncEnumerable> chunks, CancellationToken cancellationToken = default) { _ = Throw.IfNull(chunks); - IReadOnlyList? preExistingKeys = null; - List>? batch = null; + IReadOnlyList? preExistingKeys = null; + List? batch = null; long currentBatchTokenCount = 0; - await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) + await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) { - if (_vectorStoreCollection is null) + if (!_collectionEnsured) { - _vectorStoreCollection = _vectorStore.GetDynamicCollection(_options.CollectionName, GetVectorStoreRecordDefinition(chunk)); - - await _vectorStoreCollection.EnsureCollectionExistsAsync(cancellationToken).ConfigureAwait(false); + await VectorStoreCollection.EnsureCollectionExistsAsync(cancellationToken).ConfigureAwait(false); + _collectionEnsured = true; } // We obtain the IDs of the pre-existing chunks for given document, @@ -78,21 +64,19 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks // to avoid a situation where we delete the chunks and then fail to insert the new ones. preExistingKeys ??= await GetPreExistingChunksIdsAsync(chunk.Document, cancellationToken).ConfigureAwait(false); - var key = Guid.NewGuid(); - Dictionary record = new() + TRecord record = new() { - [KeyName] = key, - [ContentName] = chunk.Content, - [EmbeddingName] = chunk.Content, - [ContextName] = chunk.Context, - [DocumentIdName] = chunk.Document.Identifier, + Key = Guid.NewGuid(), + Content = chunk.Content, + Context = chunk.Context, + DocumentId = chunk.Document.Identifier, }; if (chunk.HasMetadata) { foreach (var metadata in chunk.Metadata) { - record[metadata.Key] = metadata.Value; + SetMetadata(record, metadata.Key, metadata.Value); } } @@ -102,7 +86,7 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks // If the batch is empty or the chunk alone exceeds the limit, add it anyway. if (batch.Count > 0 && currentBatchTokenCount + chunk.TokenCount > _options.BatchTokenCount) { - await _vectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false); + await VectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false); batch.Clear(); currentBatchTokenCount = 0; @@ -115,75 +99,31 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks // Upsert any remaining chunks in the batch if (batch?.Count > 0) { - await _vectorStoreCollection!.UpsertAsync(batch, cancellationToken).ConfigureAwait(false); + await VectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false); } if (preExistingKeys?.Count > 0) { - await _vectorStoreCollection!.DeleteAsync(preExistingKeys, cancellationToken).ConfigureAwait(false); - } - } - - /// - protected override void Dispose(bool disposing) - { - try - { - _vectorStoreCollection?.Dispose(); - } - finally - { - _vectorStore.Dispose(); - base.Dispose(disposing); + await VectorStoreCollection.DeleteAsync(preExistingKeys, cancellationToken).ConfigureAwait(false); } } - private VectorStoreCollectionDefinition GetVectorStoreRecordDefinition(IngestionChunk representativeChunk) + /// + /// Sets a metadata value on the record. + /// + /// The record on which to set the metadata. + /// The metadata key. + /// The metadata value. + /// + /// Override this method in derived classes to store metadata as typed properties with + /// attributes. + /// + protected virtual void SetMetadata(TRecord record, string key, object? value) { - VectorStoreCollectionDefinition definition = new() - { - Properties = - { - new VectorStoreKeyProperty(KeyName, typeof(Guid)), - - // By using T as the type here we allow the vector store - // to handle the conversion from T to the actual vector type it supports. - new VectorStoreVectorProperty(EmbeddingName, typeof(T), _dimensionCount) - { - DistanceFunction = _options.DistanceFunction, - IndexKind = _options.IndexKind - }, - new VectorStoreDataProperty(ContentName, typeof(T)), - new VectorStoreDataProperty(ContextName, typeof(string)), - new VectorStoreDataProperty(DocumentIdName, typeof(string)) - { - IsIndexed = true - } - } - }; - - if (representativeChunk.HasMetadata) - { - foreach (var metadata in representativeChunk.Metadata) - { - Type propertyType = metadata.Value.GetType(); - definition.Properties.Add(new VectorStoreDataProperty(metadata.Key, propertyType) - { - // We use lowercase storage names to ensure compatibility with various vector stores. -#pragma warning disable CA1308 // Normalize strings to uppercase - StorageName = metadata.Key.ToLowerInvariant() -#pragma warning restore CA1308 // Normalize strings to uppercase - - // We could consider indexing for certain keys like classification etc. but for now we leave it as non-indexed. - // The reason is that not every DB supports it, moreover we would need to expose the ability to configure it. - }); - } - } - - return definition; + throw new NotSupportedException($"Metadata key '{key}' is not supported. Override {nameof(SetMetadata)} in a derived class to handle metadata."); } - private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken) + private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken) { if (!_options.IncrementalIngestion) { @@ -193,19 +133,19 @@ private async Task> GetPreExistingChunksIdsAsync(Ingestion // Each Vector Store has a different max top count limit, so we use low value and loop. const int MaxTopCount = 1_000; - List keys = []; + List keys = []; int insertedCount; do { insertedCount = 0; - await foreach (var record in _vectorStoreCollection!.GetAsync( - filter: record => (string)record[DocumentIdName]! == document.Identifier, + await foreach (var record in VectorStoreCollection.GetAsync( + filter: record => record.DocumentId == document.Identifier, top: MaxTopCount, options: new() { Skip = keys.Count }, cancellationToken: cancellationToken).ConfigureAwait(false)) { - keys.Add(record[KeyName]!); + keys.Add(record.Key); insertedCount++; } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs index e7a7b5e6c79..d86ddb8e97f 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs @@ -7,34 +7,12 @@ namespace Microsoft.Extensions.DataIngestion; /// -/// Represents options for the . +/// Represents options for the . /// public sealed class VectorStoreWriterOptions { private const int DefaultBatchTokenCount = 256 * IngestionChunkerOptions.DefaultTokensPerChunk; - /// - /// Gets or sets the name of the collection. When not provided, "chunks" will be used. - /// - public string CollectionName - { - get => field ?? "chunks"; - set => field = Throw.IfNullOrEmpty(value); - } - - /// - /// Gets or sets the distance function to use when creating the collection. - /// - /// - /// When not provided, the default specific to given database will be used. Check for available values. - /// - public string? DistanceFunction { get; set; } - - /// - /// Gets or sets the index kind to use when creating the collection. - /// - public string? IndexKind { get; set; } - /// /// Gets or sets a value indicating whether to perform incremental ingestion. /// diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs index 31442718f1f..178d77b9465 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs @@ -44,7 +44,7 @@ var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db"); var vectorStoreConnectionString = $"Data Source={vectorStorePath}"; builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString); -builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); +builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); #endif builder.Services.AddSingleton(); builder.Services.AddSingleton(); diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs index 4b041f4a15f..0736628c417 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs @@ -105,7 +105,7 @@ var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db"); var vectorStoreConnectionString = $"Data Source={vectorStorePath}"; builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString); -builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); +builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); #endif builder.Services.AddSingleton(); diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs index 60e6b5684e4..199b91e638b 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs @@ -1,9 +1,9 @@ -using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace AIChatWeb_CSharp.Web.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { #if (IsOllama) public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model @@ -17,23 +17,6 @@ public class IngestedChunk #endif public const string CollectionName = "data-AIChatWeb-CSharp.Web-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] + public override string? Embedding => Content; } diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs index c573403e618..76168b6e632 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace AIChatWeb_CSharp.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs index 49bef8de3d1..8cdc6dbeae0 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs @@ -4,11 +4,7 @@ namespace AIChatWeb_CSharp.Web.Services; public class SemanticSearch( -#if (IsQdrant) VectorStoreCollection vectorCollection, -#else - VectorStoreCollection vectorCollection, -#endif [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs index 272ccd510a4..3fd5439f848 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs @@ -9,6 +9,7 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.AI; +using Microsoft.Extensions.VectorData; using Microsoft.ML.Tokenizers; using Microsoft.SemanticKernel.Connectors.InMemory; using OpenTelemetry; @@ -84,7 +85,10 @@ public async Task CanProcessDocuments() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var collection = testVectorStore.GetIngestionRecordCollection, string>( + "chunks", TestEmbeddingGenerator.DimensionCount); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); List ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync(); @@ -95,15 +99,15 @@ public async Task CanProcessDocuments() Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called."); var retrieved = await vectorStoreWriter.VectorStoreCollection - .GetAsync(record => _sampleFiles.Any(info => info.FullName == (string)record["documentid"]!), top: 1000) + .GetAsync(record => _sampleFiles.Any(info => info.FullName == record.DocumentId), top: 1000) .ToListAsync(); Assert.NotEmpty(retrieved); for (int i = 0; i < retrieved.Count; i++) { - Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!); - Assert.NotEmpty((string)retrieved[i]["content"]!); - Assert.Contains((string)retrieved[i]["documentid"]!, _sampleFiles.Select(info => info.FullName)); + Assert.NotEqual(Guid.Empty, retrieved[i].Key); + Assert.NotEmpty(retrieved[i].Content!); + Assert.Contains(retrieved[i].DocumentId, _sampleFiles.Select(info => info.FullName)); } AssertActivities(activities, "ProcessFiles"); @@ -117,7 +121,10 @@ public async Task CanProcessDocumentsInDirectory() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var collection = testVectorStore.GetIngestionRecordCollection, string>( + "chunks-dir", TestEmbeddingGenerator.DimensionCount); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); @@ -129,15 +136,15 @@ public async Task CanProcessDocumentsInDirectory() Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called."); var retrieved = await vectorStoreWriter.VectorStoreCollection - .GetAsync(record => ((string)record["documentid"]!).StartsWith(directory.FullName), top: 1000) + .GetAsync(record => record.DocumentId.StartsWith(directory.FullName), top: 1000) .ToListAsync(); Assert.NotEmpty(retrieved); for (int i = 0; i < retrieved.Count; i++) { - Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!); - Assert.NotEmpty((string)retrieved[i]["content"]!); - Assert.StartsWith(directory.FullName, (string)retrieved[i]["documentid"]!); + Assert.NotEqual(Guid.Empty, retrieved[i].Key); + Assert.NotEmpty(retrieved[i].Content!); + Assert.StartsWith(directory.FullName, retrieved[i].DocumentId); } AssertActivities(activities, "ProcessDirectory"); @@ -151,7 +158,10 @@ public async Task ChunksCanBeMoreThanJustText() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var collection = testVectorStore.GetIngestionRecordCollection, DataContent>( + "chunks-img", TestEmbeddingGenerator.DimensionCount); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter); Assert.False(embeddingGenerator.WasCalled); @@ -159,15 +169,15 @@ public async Task ChunksCanBeMoreThanJustText() AssertAllIngestionsSucceeded(ingestionResults); var retrieved = await vectorStoreWriter.VectorStoreCollection - .GetAsync(record => ((string)record["documentid"]!).EndsWith(_withImage.Name), top: 100) + .GetAsync(record => record.DocumentId.EndsWith(_withImage.Name), top: 100) .ToListAsync(); Assert.True(embeddingGenerator.WasCalled); Assert.NotEmpty(retrieved); for (int i = 0; i < retrieved.Count; i++) { - Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!); - Assert.EndsWith(_withImage.Name, (string)retrieved[i]["documentid"]!); + Assert.NotEqual(Guid.Empty, retrieved[i].Key); + Assert.EndsWith(_withImage.Name, retrieved[i].DocumentId); } AssertActivities(activities, "ProcessFiles"); @@ -200,7 +210,10 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var collection = testVectorStore.GetIngestionRecordCollection, string>( + "chunks-fail", TestEmbeddingGenerator.DimensionCount); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(failingForFirstReader, CreateChunker(), vectorStoreWriter); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs new file mode 100644 index 00000000000..7a62230d815 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.Extensions.VectorData; + +namespace Microsoft.Extensions.DataIngestion.Writers.Tests; + +public class TestChunkRecordWithMetadata : IngestedChunkRecord +{ + public const int TestDimensionCount = 4; + + [VectorStoreVector(TestDimensionCount, StorageName = EmbeddingStorageName)] + public override string? Embedding => Content; + + [VectorStoreData(StorageName = "classification")] + public string? Classification { get; set; } +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestVectorStoreWriterWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestVectorStoreWriterWithMetadata.cs new file mode 100644 index 00000000000..ec9a7e9174a --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestVectorStoreWriterWithMetadata.cs @@ -0,0 +1,25 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using Microsoft.Extensions.VectorData; + +namespace Microsoft.Extensions.DataIngestion.Writers.Tests; + +public class TestVectorStoreWriterWithMetadata : VectorStoreWriter +{ + public TestVectorStoreWriterWithMetadata(VectorStoreCollection collection, VectorStoreWriterOptions? options = default) + : base(collection, options) + { + } + + protected override void SetMetadata(TestChunkRecordWithMetadata record, string key, object? value) + { + switch (key) + { + case nameof(TestChunkRecordWithMetadata.Classification): + record.Classification = value as string; + break; + } + } +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs index 013b24352f4..8612ca933b5 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs @@ -13,7 +13,6 @@ public void DefaultValues_ShouldBeSetCorrectly() { VectorStoreWriterOptions options = new(); - Assert.Equal("chunks", options.CollectionName); Assert.True(options.IncrementalIngestion); Assert.Equal(512000, options.BatchTokenCount); // 256 * 2000 } diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs index 1ac09dd6577..d725c548f74 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs @@ -14,42 +14,114 @@ namespace Microsoft.Extensions.DataIngestion.Writers.Tests; public abstract class VectorStoreWriterTests { [Fact] - public async Task CanGenerateDynamicSchema() + public async Task CanWriteChunksWithCustomDefinition() { string documentId = Guid.NewGuid().ToString(); using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - using VectorStoreWriter writer = new( - vectorStore, - dimensionCount: TestEmbeddingGenerator.DimensionCount); + + // User creates their own definition without using CreateDefaultCollectionDefinition, + // using custom storage names to prove they can map to a pre-existing collection schema. + VectorStoreCollectionDefinition definition = new() + { + Properties = + { + new VectorStoreKeyProperty(nameof(IngestedChunkRecord<>.Key), typeof(Guid)) { StorageName = "custom_key" }, + new VectorStoreVectorProperty(nameof(IngestedChunkRecord<>.Embedding), typeof(string), TestEmbeddingGenerator.DimensionCount) + { + StorageName = "custom_embedding", + }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord<>.Content), typeof(string)) { StorageName = "custom_content" }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord<>.Context), typeof(string)) { StorageName = "custom_context" }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord<>.DocumentId), typeof(string)) + { + StorageName = "custom_documentid", + IsIndexed = true, + }, + }, + }; + + var collection = vectorStore.GetCollection>("chunks-custom", definition); + + using VectorStoreWriter> writer = new(collection); + + IngestionDocument document = new(documentId); + IngestionChunk chunk = TestChunkFactory.CreateChunk("custom schema content", document); + + List> chunks = [chunk]; + + await writer.WriteAsync(chunks.ToAsyncEnumerable()); + + IngestedChunkRecord record = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 1) + .SingleAsync(); + + Assert.NotNull(record); + Assert.NotEqual(Guid.Empty, record.Key); + Assert.Equal(documentId, record.DocumentId); + Assert.Equal(chunks[0].Content, record.Content); + } + + [Fact] + public async Task CanWriteChunks() + { + string documentId = Guid.NewGuid().ToString(); + + using TestEmbeddingGenerator testEmbeddingGenerator = new(); + using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); + + var collection = vectorStore.GetIngestionRecordCollection, string>( + "chunks", TestEmbeddingGenerator.DimensionCount); + + using VectorStoreWriter> writer = new(collection); IngestionDocument document = new(documentId); IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document); - chunk.Metadata["key1"] = "value1"; - chunk.Metadata["key2"] = 123; - chunk.Metadata["key3"] = true; - chunk.Metadata["key4"] = 123.45; List> chunks = [chunk]; Assert.False(testEmbeddingGenerator.WasCalled); await writer.WriteAsync(chunks.ToAsyncEnumerable()); - Dictionary record = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 1) + IngestedChunkRecord record = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 1) .SingleAsync(); Assert.NotNull(record); - Assert.NotNull(record["key"]); - Assert.Equal(documentId, record["documentid"]); - Assert.Equal(chunks[0].Content, record["content"]); + Assert.NotEqual(Guid.Empty, record.Key); + Assert.Equal(documentId, record.DocumentId); + Assert.Equal(chunks[0].Content, record.Content); Assert.True(testEmbeddingGenerator.WasCalled); - foreach (var kvp in chunks[0].Metadata) - { - Assert.True(record.ContainsKey(kvp.Key), $"Record does not contain key '{kvp.Key}'"); - Assert.Equal(kvp.Value, record[kvp.Key]); - } + } + + [Fact] + public async Task CanWriteChunksWithMetadata() + { + string documentId = Guid.NewGuid().ToString(); + + using TestEmbeddingGenerator testEmbeddingGenerator = new(); + using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); + + var collection = vectorStore.GetCollection("chunks-meta"); + using TestVectorStoreWriterWithMetadata writer = new(collection); + + IngestionDocument document = new(documentId); + IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document); + chunk.Metadata["Classification"] = "important"; + + List> chunks = [chunk]; + + await writer.WriteAsync(chunks.ToAsyncEnumerable()); + + TestChunkRecordWithMetadata record = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 1) + .SingleAsync(); + + Assert.NotNull(record); + Assert.Equal(documentId, record.DocumentId); + Assert.Equal(chunks[0].Content, record.Content); + Assert.Equal("important", record.Classification); } [Fact] @@ -59,9 +131,12 @@ public async Task DoesSupportIncrementalIngestion() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - using VectorStoreWriter writer = new( - vectorStore, - dimensionCount: TestEmbeddingGenerator.DimensionCount, + + var collection = vectorStore.GetIngestionRecordCollection, string>( + "chunks-incr", TestEmbeddingGenerator.DimensionCount); + + using VectorStoreWriter> writer = new( + collection, options: new() { IncrementalIngestion = true, @@ -69,8 +144,6 @@ public async Task DoesSupportIncrementalIngestion() IngestionDocument document = new(documentId); IngestionChunk chunk1 = TestChunkFactory.CreateChunk("first chunk", document); - chunk1.Metadata["key1"] = "value1"; - IngestionChunk chunk2 = TestChunkFactory.CreateChunk("second chunk", document); List> chunks = [chunk1, chunk2]; @@ -78,27 +151,25 @@ public async Task DoesSupportIncrementalIngestion() await writer.WriteAsync(chunks.ToAsyncEnumerable()); int recordCount = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100) + .GetAsync(filter: record => record.DocumentId == documentId, top: 100) .CountAsync(); Assert.Equal(chunks.Count, recordCount); // Now we will do an incremental ingestion that updates the chunk(s). IngestionChunk updatedChunk = TestChunkFactory.CreateChunk("different content", document); - updatedChunk.Metadata["key1"] = "value2"; List> updatedChunks = [updatedChunk]; await writer.WriteAsync(updatedChunks.ToAsyncEnumerable()); // We ask for 100 records, but we expect only 1 as the previous 2 should have been deleted. - Dictionary record = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100) + IngestedChunkRecord record = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 100) .SingleAsync(); Assert.NotNull(record); - Assert.NotNull(record["key"]); - Assert.Equal("different content", record["content"]); - Assert.Equal("value2", record["key1"]); + Assert.NotEqual(Guid.Empty, record.Key); + Assert.Equal("different content", record.Content); } public static TheoryData BatchingTestCases => new() @@ -131,9 +202,11 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts) options.BatchTokenCount = batchTokenCount.Value; } - using VectorStoreWriter writer = new( - vectorStore, - dimensionCount: TestEmbeddingGenerator.DimensionCount, + var collection = vectorStore.GetIngestionRecordCollection, string>( + "chunks-batch", TestEmbeddingGenerator.DimensionCount); + + using VectorStoreWriter> writer = new( + collection, options: options); IngestionDocument document = new(documentId); @@ -146,7 +219,7 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts) await writer.WriteAsync(chunks.ToAsyncEnumerable()); int recordCount = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100) + .GetAsync(filter: record => record.DocumentId == documentId, top: 100) .CountAsync(); Assert.Equal(chunks.Count, recordCount); @@ -159,9 +232,12 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - using VectorStoreWriter writer = new( - vectorStore, - dimensionCount: TestEmbeddingGenerator.DimensionCount, + + var collection = vectorStore.GetIngestionRecordCollection, string>( + "chunks-many", TestEmbeddingGenerator.DimensionCount); + + using VectorStoreWriter> writer = new( + collection, options: new() { IncrementalIngestion = true, @@ -180,7 +256,7 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun await writer.WriteAsync(chunks.ToAsyncEnumerable()); int recordCount = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 10000) + .GetAsync(filter: record => record.DocumentId == documentId, top: 10000) .CountAsync(); Assert.Equal(chunks.Count, recordCount); @@ -194,13 +270,13 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun await writer.WriteAsync(updatedChunks.ToAsyncEnumerable()); // Verify that all old records were deleted and only the new ones remain - List> records = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 10000) + List> records = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 10000) .ToListAsync(); Assert.Equal(updatedChunks.Count, records.Count); - Assert.Contains(records, r => (string)r["content"]! == "updated chunk 1"); - Assert.Contains(records, r => (string)r["content"]! == "updated chunk 2"); + Assert.Contains(records, r => r.Content == "updated chunk 1"); + Assert.Contains(records, r => r.Content == "updated chunk 2"); } protected abstract VectorStore CreateVectorStore(TestEmbeddingGenerator testEmbeddingGenerator); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Program.cs index e47bad71cc8..0dd33b88a71 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Program.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Program.cs @@ -18,7 +18,7 @@ var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db"); var vectorStoreConnectionString = $"Data Source={vectorStorePath}"; builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString); -builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); +builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddKeyedSingleton("ingestion_directory", new DirectoryInfo(Path.Combine(builder.Environment.WebRootPath, "Data"))); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index af609ea239e..1dec3c99a20 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,31 +1,14 @@ -using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 9dd366a03a5..61088b1225d 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs index d043c8efb84..7d8718028bd 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs @@ -4,7 +4,7 @@ namespace aichatweb.Web.Services; public class SemanticSearch( - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index 2d93db8fd94..b1a757a01e7 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,31 +1,14 @@ -using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 9dd366a03a5..61088b1225d 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs index d043c8efb84..7d8718028bd 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs @@ -4,7 +4,7 @@ namespace aichatweb.Web.Services; public class SemanticSearch( - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Program.cs index 27e50372647..2df0c881faa 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Program.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Program.cs @@ -25,7 +25,7 @@ var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db"); var vectorStoreConnectionString = $"Data Source={vectorStorePath}"; builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString); -builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); +builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); builder.Services.AddSingleton(); builder.Services.AddSingleton(); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs index 68af3ef20fb..998a2d0db4d 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs @@ -1,31 +1,14 @@ -using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs index d97b986b694..b4675927d47 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/SemanticSearch.cs index 8072f8bcddb..f825ebf5a5c 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/SemanticSearch.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/SemanticSearch.cs @@ -4,7 +4,7 @@ namespace aichatweb.Services; public class SemanticSearch( - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index b55a8b3c817..174fe48a4ae 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,31 +1,14 @@ -using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 9dd366a03a5..61088b1225d 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs index 348bb5d942f..c364b7dd235 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs @@ -1,31 +1,14 @@ -using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs index d97b986b694..b4675927d47 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/SemanticSearch.cs index 8072f8bcddb..f825ebf5a5c 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/SemanticSearch.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/SemanticSearch.cs @@ -4,7 +4,7 @@ namespace aichatweb.Services; public class SemanticSearch( - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) {