From ea56d337fb0238e48fa8ab4b1c574ad016685a47 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:50:44 +0000 Subject: [PATCH 01/14] Initial plan From 581bce019cc997295b6016d5f3919ffb721c9c27 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:27:58 +0000 Subject: [PATCH 02/14] Refactor VectorStoreWriter to accept VectorStoreCollection, add IngestedChunkRecord base type Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/IngestedChunkRecord.cs | 139 +++++++++++++++++ .../Writers/VectorStoreWriter.cs | 144 +++++------------- .../Writers/VectorStoreWriterOptions.cs | 24 +-- .../Components/Pages/Chat/Chat.razor | 2 +- .../Services/IngestedChunk.cs | 29 ++-- .../Services/Ingestion/DataIngestor.cs | 14 +- 6 files changed, 201 insertions(+), 151 deletions(-) create mode 100644 src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs new file mode 100644 index 00000000000..dd24d8edd18 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -0,0 +1,139 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Text.Json.Serialization; +using Microsoft.Extensions.VectorData; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Represents the base record type used by to store ingested chunks in a vector store. +/// +/// The type of the key for the record. +/// The type of the chunk content. +/// +/// When the vector dimension count is not known at compile time, use the +/// helper to create a and pass it to the vector store collection constructor. +/// When the vector dimension count is known at compile time, derive from this class and add +/// the to the property. +/// +public class IngestedChunkRecord +{ + /// + /// The storage name for the property. + /// + public const string KeyPropertyName = "key"; + + /// + /// The storage name for the property. + /// + public const string DocumentIdPropertyName = "documentid"; + + /// + /// The storage name for the property. + /// + public const string ContentPropertyName = "content"; + + /// + /// The storage name for the property. + /// + public const string ContextPropertyName = "context"; + + /// + /// The storage name for the property. + /// + public const string EmbeddingPropertyName = "embedding"; + + /// + /// Gets or sets the unique key for this record. + /// + [VectorStoreKey(StorageName = KeyPropertyName)] + [JsonPropertyName(KeyPropertyName)] + public TKey Key { get; set; } = default!; + + /// + /// Gets or sets the identifier of the document from which this chunk was extracted. + /// + [VectorStoreData(StorageName = DocumentIdPropertyName)] + [JsonPropertyName(DocumentIdPropertyName)] + public string DocumentId { get; set; } = string.Empty; + + /// + /// Gets or sets the content of the chunk. + /// + [VectorStoreData(StorageName = ContentPropertyName)] + [JsonPropertyName(ContentPropertyName)] + public TChunk? Content { get; set; } + + /// + /// Gets or sets additional context for the chunk. + /// + [VectorStoreData(StorageName = ContextPropertyName)] + [JsonPropertyName(ContextPropertyName)] + public string? Context { get; set; } + + /// + /// Gets the embedding value for this record. + /// + /// + /// By default, returns the value. The vector store's embedding generator + /// will convert this to a vector. Override this property in derived classes to add + /// the with the appropriate dimension count. + /// + [JsonPropertyName(EmbeddingPropertyName)] + public virtual TChunk? Embedding => Content; + + /// + /// Sets a metadata value on the record. + /// + /// The metadata key. + /// The metadata value. + /// + /// Override this method in derived classes to store metadata as typed properties with + /// attributes. The default implementation is a no-op. + /// + public virtual void SetMetadata(string key, object? value) + { + // Default implementation: no-op. + // Derived classes can override to map metadata keys to typed properties. + } + + /// + /// Creates a for . + /// + /// The number of dimensions that the vector has. + /// + /// The distance function to use. When not provided, the default specific to given database will be used. + /// Check for available values. + /// + /// The index kind to use. + /// A suitable for creating a vector store collection. + /// is less than or equal to zero. + public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null) + { + Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0); + + return new VectorStoreCollectionDefinition + { + Properties = + { + new VectorStoreKeyProperty(KeyPropertyName, typeof(TKey)), + + // By using TChunk as the type here we allow the vector store + // to handle the conversion from TChunk to the actual vector type it supports. + new VectorStoreVectorProperty(EmbeddingPropertyName, typeof(TChunk), dimensionCount) + { + DistanceFunction = distanceFunction, + IndexKind = indexKind, + }, + new VectorStoreDataProperty(ContentPropertyName, typeof(TChunk)), + new VectorStoreDataProperty(ContextPropertyName, typeof(string)), + new VectorStoreDataProperty(DocumentIdPropertyName, typeof(string)) + { + IsIndexed = true, + }, + }, + }; + } +} diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs index a10a6595095..d373843af86 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs @@ -11,66 +11,50 @@ namespace Microsoft.Extensions.DataIngestion; /// -/// Writes chunks to the using the default schema. +/// Writes chunks to a . /// -/// The type of the chunk content. -public sealed class VectorStoreWriter : IngestionChunkWriter +/// The type of the key for the record. +/// The type of the chunk content. +/// The type of the record stored in the vector store. +public sealed class VectorStoreWriter : IngestionChunkWriter + where TKey : notnull + where TRecord : IngestedChunkRecord, new() { - // The names are lowercase with no special characters to ensure compatibility with various vector stores. - private const string KeyName = "key"; - private const string EmbeddingName = "embedding"; - private const string ContentName = "content"; - private const string ContextName = "context"; - private const string DocumentIdName = "documentid"; - - private readonly VectorStore _vectorStore; - private readonly int _dimensionCount; private readonly VectorStoreWriterOptions _options; - - private VectorStoreCollection>? _vectorStoreCollection; + private bool _collectionEnsured; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// - /// The to use to store the instances. - /// The number of dimensions that the vector has. This value is required when creating collections. + /// The to use to store the instances. /// The options for the vector store writer. - /// When is null. - /// When is less or equal zero. - public VectorStoreWriter(VectorStore vectorStore, int dimensionCount, VectorStoreWriterOptions? options = default) + /// When is null. + public VectorStoreWriter(VectorStoreCollection collection, VectorStoreWriterOptions? options = default) { - _vectorStore = Throw.IfNull(vectorStore); - _dimensionCount = Throw.IfLessThanOrEqual(dimensionCount, 0); + VectorStoreCollection = Throw.IfNull(collection); _options = options ?? new VectorStoreWriterOptions(); } /// /// Gets the underlying used to store the chunks. /// - /// - /// The collection is initialized when is called for the first time. - /// - /// The collection has not been initialized yet. - /// Call first. - public VectorStoreCollection> VectorStoreCollection - => _vectorStoreCollection ?? throw new InvalidOperationException("The collection has not been initialized yet. Call WriteAsync first."); + public VectorStoreCollection VectorStoreCollection { get; } /// - public override async Task WriteAsync(IAsyncEnumerable> chunks, CancellationToken cancellationToken = default) + public override async Task WriteAsync(IAsyncEnumerable> chunks, CancellationToken cancellationToken = default) { _ = Throw.IfNull(chunks); - IReadOnlyList? preExistingKeys = null; - List>? batch = null; + IReadOnlyList? preExistingKeys = null; + List? batch = null; long currentBatchTokenCount = 0; - await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) + await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) { - if (_vectorStoreCollection is null) + if (!_collectionEnsured) { - _vectorStoreCollection = _vectorStore.GetDynamicCollection(_options.CollectionName, GetVectorStoreRecordDefinition(chunk)); - - await _vectorStoreCollection.EnsureCollectionExistsAsync(cancellationToken).ConfigureAwait(false); + await VectorStoreCollection.EnsureCollectionExistsAsync(cancellationToken).ConfigureAwait(false); + _collectionEnsured = true; } // We obtain the IDs of the pre-existing chunks for given document, @@ -78,21 +62,19 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks // to avoid a situation where we delete the chunks and then fail to insert the new ones. preExistingKeys ??= await GetPreExistingChunksIdsAsync(chunk.Document, cancellationToken).ConfigureAwait(false); - var key = Guid.NewGuid(); - Dictionary record = new() + TRecord record = new() { - [KeyName] = key, - [ContentName] = chunk.Content, - [EmbeddingName] = chunk.Content, - [ContextName] = chunk.Context, - [DocumentIdName] = chunk.Document.Identifier, + Key = GenerateKey(), + Content = chunk.Content, + Context = chunk.Context, + DocumentId = chunk.Document.Identifier, }; if (chunk.HasMetadata) { foreach (var metadata in chunk.Metadata) { - record[metadata.Key] = metadata.Value; + record.SetMetadata(metadata.Key, metadata.Value); } } @@ -102,7 +84,7 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks // If the batch is empty or the chunk alone exceeds the limit, add it anyway. if (batch.Count > 0 && currentBatchTokenCount + chunk.TokenCount > _options.BatchTokenCount) { - await _vectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false); + await VectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false); batch.Clear(); currentBatchTokenCount = 0; @@ -115,75 +97,31 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks // Upsert any remaining chunks in the batch if (batch?.Count > 0) { - await _vectorStoreCollection!.UpsertAsync(batch, cancellationToken).ConfigureAwait(false); + await VectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false); } if (preExistingKeys?.Count > 0) { - await _vectorStoreCollection!.DeleteAsync(preExistingKeys, cancellationToken).ConfigureAwait(false); + await VectorStoreCollection.DeleteAsync(preExistingKeys, cancellationToken).ConfigureAwait(false); } } - /// - protected override void Dispose(bool disposing) + private static TKey GenerateKey() { - try - { - _vectorStoreCollection?.Dispose(); - } - finally + if (typeof(TKey) == typeof(Guid)) { - _vectorStore.Dispose(); - base.Dispose(disposing); + return (TKey)(object)Guid.NewGuid(); } - } - - private VectorStoreCollectionDefinition GetVectorStoreRecordDefinition(IngestionChunk representativeChunk) - { - VectorStoreCollectionDefinition definition = new() - { - Properties = - { - new VectorStoreKeyProperty(KeyName, typeof(Guid)), - // By using T as the type here we allow the vector store - // to handle the conversion from T to the actual vector type it supports. - new VectorStoreVectorProperty(EmbeddingName, typeof(T), _dimensionCount) - { - DistanceFunction = _options.DistanceFunction, - IndexKind = _options.IndexKind - }, - new VectorStoreDataProperty(ContentName, typeof(T)), - new VectorStoreDataProperty(ContextName, typeof(string)), - new VectorStoreDataProperty(DocumentIdName, typeof(string)) - { - IsIndexed = true - } - } - }; - - if (representativeChunk.HasMetadata) + if (typeof(TKey) == typeof(string)) { - foreach (var metadata in representativeChunk.Metadata) - { - Type propertyType = metadata.Value.GetType(); - definition.Properties.Add(new VectorStoreDataProperty(metadata.Key, propertyType) - { - // We use lowercase storage names to ensure compatibility with various vector stores. -#pragma warning disable CA1308 // Normalize strings to uppercase - StorageName = metadata.Key.ToLowerInvariant() -#pragma warning restore CA1308 // Normalize strings to uppercase - - // We could consider indexing for certain keys like classification etc. but for now we leave it as non-indexed. - // The reason is that not every DB supports it, moreover we would need to expose the ability to configure it. - }); - } + return (TKey)(object)Guid.NewGuid().ToString(); } - return definition; + throw new NotSupportedException($"Automatic key generation is not supported for key type '{typeof(TKey)}'. Supported key types are Guid and string."); } - private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken) + private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken) { if (!_options.IncrementalIngestion) { @@ -193,19 +131,19 @@ private async Task> GetPreExistingChunksIdsAsync(Ingestion // Each Vector Store has a different max top count limit, so we use low value and loop. const int MaxTopCount = 1_000; - List keys = []; + List keys = []; int insertedCount; do { insertedCount = 0; - await foreach (var record in _vectorStoreCollection!.GetAsync( - filter: record => (string)record[DocumentIdName]! == document.Identifier, + await foreach (var record in VectorStoreCollection.GetAsync( + filter: record => record.DocumentId == document.Identifier, top: MaxTopCount, options: new() { Skip = keys.Count }, cancellationToken: cancellationToken).ConfigureAwait(false)) { - keys.Add(record[KeyName]!); + keys.Add(record.Key); insertedCount++; } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs index e7a7b5e6c79..3799c3a56cc 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs @@ -7,34 +7,12 @@ namespace Microsoft.Extensions.DataIngestion; /// -/// Represents options for the . +/// Represents options for the . /// public sealed class VectorStoreWriterOptions { private const int DefaultBatchTokenCount = 256 * IngestionChunkerOptions.DefaultTokensPerChunk; - /// - /// Gets or sets the name of the collection. When not provided, "chunks" will be used. - /// - public string CollectionName - { - get => field ?? "chunks"; - set => field = Throw.IfNullOrEmpty(value); - } - - /// - /// Gets or sets the distance function to use when creating the collection. - /// - /// - /// When not provided, the default specific to given database will be used. Check for available values. - /// - public string? DistanceFunction { get; set; } - - /// - /// Gets or sets the index kind to use when creating the collection. - /// - public string? IndexKind { get; set; } - /// /// Gets or sets a value indicating whether to perform incremental ingestion. /// diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs index 60e6b5684e4..ae1ac018622 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs @@ -1,9 +1,14 @@ using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace AIChatWeb_CSharp.Web.Services; -public class IngestedChunk +#if (IsQdrant) +public class IngestedChunk : IngestedChunkRecord +#else +public class IngestedChunk : IngestedChunkRecord +#endif { #if (IsOllama) public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model @@ -17,23 +22,7 @@ public class IngestedChunk #endif public const string CollectionName = "data-AIChatWeb-CSharp.Web-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [JsonPropertyName(EmbeddingPropertyName)] + public override string? Embedding => Content; } diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs index c573403e618..5d9cffe29a4 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,21 @@ namespace AIChatWeb_CSharp.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, +#if (IsQdrant) + VectorStoreCollection vectorCollection, +#else + VectorStoreCollection vectorCollection, +#endif IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() +#if (IsQdrant) + using var writer = new VectorStoreWriter(vectorCollection, new() +#else + using var writer = new VectorStoreWriter(vectorCollection, new() +#endif { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); From 727bcd5b468d063aa773a7f1ae07cd6df746d971 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:31:18 +0000 Subject: [PATCH 03/14] Update all test files, snapshot files, and template files for VectorStoreWriter refactoring Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../IngestionPipelineTests.cs | 43 ++++--- .../Writers/TestChunkRecordWithMetadata.cs | 31 +++++ .../Writers/VectorStoreWriterOptionsTests.cs | 1 - .../Writers/VectorStoreWriterTests.cs | 110 +++++++++++------- .../Components/Pages/Chat/Chat.razor | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 25 +--- .../Services/Ingestion/DataIngestor.cs | 6 +- .../Components/Pages/Chat/Chat.razor | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 25 +--- .../Services/Ingestion/DataIngestor.cs | 6 +- .../Components/Pages/Chat/Chat.razor | 2 +- .../aichatweb/Services/IngestedChunk.cs | 25 +--- .../Services/Ingestion/DataIngestor.cs | 6 +- .../Components/Pages/Chat/Chat.razor | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 25 +--- .../Services/Ingestion/DataIngestor.cs | 6 +- .../Components/Pages/Chat/Chat.razor | 2 +- .../aichatweb/Services/IngestedChunk.cs | 25 +--- .../Services/Ingestion/DataIngestor.cs | 6 +- 19 files changed, 167 insertions(+), 183 deletions(-) create mode 100644 test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs index 272ccd510a4..a2365b1a02e 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs @@ -9,6 +9,7 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.AI; +using Microsoft.Extensions.VectorData; using Microsoft.ML.Tokenizers; using Microsoft.SemanticKernel.Connectors.InMemory; using OpenTelemetry; @@ -84,7 +85,10 @@ public async Task CanProcessDocuments() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = testVectorStore.GetCollection>("chunks", definition); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); List ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync(); @@ -95,15 +99,15 @@ public async Task CanProcessDocuments() Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called."); var retrieved = await vectorStoreWriter.VectorStoreCollection - .GetAsync(record => _sampleFiles.Any(info => info.FullName == (string)record["documentid"]!), top: 1000) + .GetAsync(record => _sampleFiles.Any(info => info.FullName == record.DocumentId), top: 1000) .ToListAsync(); Assert.NotEmpty(retrieved); for (int i = 0; i < retrieved.Count; i++) { - Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!); - Assert.NotEmpty((string)retrieved[i]["content"]!); - Assert.Contains((string)retrieved[i]["documentid"]!, _sampleFiles.Select(info => info.FullName)); + Assert.NotEqual(Guid.Empty, retrieved[i].Key); + Assert.NotEmpty((string)retrieved[i].Content!); + Assert.Contains(retrieved[i].DocumentId, _sampleFiles.Select(info => info.FullName)); } AssertActivities(activities, "ProcessFiles"); @@ -117,7 +121,10 @@ public async Task CanProcessDocumentsInDirectory() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = testVectorStore.GetCollection>("chunks-dir", definition); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); @@ -129,15 +136,15 @@ public async Task CanProcessDocumentsInDirectory() Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called."); var retrieved = await vectorStoreWriter.VectorStoreCollection - .GetAsync(record => ((string)record["documentid"]!).StartsWith(directory.FullName), top: 1000) + .GetAsync(record => record.DocumentId.StartsWith(directory.FullName), top: 1000) .ToListAsync(); Assert.NotEmpty(retrieved); for (int i = 0; i < retrieved.Count; i++) { - Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!); - Assert.NotEmpty((string)retrieved[i]["content"]!); - Assert.StartsWith(directory.FullName, (string)retrieved[i]["documentid"]!); + Assert.NotEqual(Guid.Empty, retrieved[i].Key); + Assert.NotEmpty((string)retrieved[i].Content!); + Assert.StartsWith(directory.FullName, retrieved[i].DocumentId); } AssertActivities(activities, "ProcessDirectory"); @@ -151,7 +158,10 @@ public async Task ChunksCanBeMoreThanJustText() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = testVectorStore.GetCollection>("chunks-img", definition); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter); Assert.False(embeddingGenerator.WasCalled); @@ -159,15 +169,15 @@ public async Task ChunksCanBeMoreThanJustText() AssertAllIngestionsSucceeded(ingestionResults); var retrieved = await vectorStoreWriter.VectorStoreCollection - .GetAsync(record => ((string)record["documentid"]!).EndsWith(_withImage.Name), top: 100) + .GetAsync(record => record.DocumentId.EndsWith(_withImage.Name), top: 100) .ToListAsync(); Assert.True(embeddingGenerator.WasCalled); Assert.NotEmpty(retrieved); for (int i = 0; i < retrieved.Count; i++) { - Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!); - Assert.EndsWith(_withImage.Name, (string)retrieved[i]["documentid"]!); + Assert.NotEqual(Guid.Empty, retrieved[i].Key); + Assert.EndsWith(_withImage.Name, retrieved[i].DocumentId); } AssertActivities(activities, "ProcessFiles"); @@ -200,7 +210,10 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = testVectorStore.GetCollection>("chunks-fail", definition); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(failingForFirstReader, CreateChunker(), vectorStoreWriter); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs new file mode 100644 index 00000000000..bc283241509 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Text.Json.Serialization; +using Microsoft.Extensions.VectorData; + +namespace Microsoft.Extensions.DataIngestion.Writers.Tests; + +public class TestChunkRecordWithMetadata : IngestedChunkRecord +{ + public const int TestDimensionCount = 4; + + [VectorStoreVector(TestDimensionCount, StorageName = EmbeddingPropertyName)] + [JsonPropertyName(EmbeddingPropertyName)] + public override string? Embedding => Content; + + [VectorStoreData(StorageName = "classification")] + [JsonPropertyName("classification")] + public string? Classification { get; set; } + + public override void SetMetadata(string key, object? value) + { + switch (key) + { + case nameof(Classification): + Classification = value as string; + break; + } + } +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs index 013b24352f4..8612ca933b5 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs @@ -13,7 +13,6 @@ public void DefaultValues_ShouldBeSetCorrectly() { VectorStoreWriterOptions options = new(); - Assert.Equal("chunks", options.CollectionName); Assert.True(options.IncrementalIngestion); Assert.Equal(512000, options.BatchTokenCount); // 256 * 2000 } diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs index 1ac09dd6577..6f2177f84cf 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs @@ -14,42 +14,64 @@ namespace Microsoft.Extensions.DataIngestion.Writers.Tests; public abstract class VectorStoreWriterTests { [Fact] - public async Task CanGenerateDynamicSchema() + public async Task CanWriteChunks() { string documentId = Guid.NewGuid().ToString(); using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - using VectorStoreWriter writer = new( - vectorStore, - dimensionCount: TestEmbeddingGenerator.DimensionCount); + + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = vectorStore.GetCollection>("chunks", definition); + + using VectorStoreWriter> writer = new(collection); IngestionDocument document = new(documentId); IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document); - chunk.Metadata["key1"] = "value1"; - chunk.Metadata["key2"] = 123; - chunk.Metadata["key3"] = true; - chunk.Metadata["key4"] = 123.45; List> chunks = [chunk]; Assert.False(testEmbeddingGenerator.WasCalled); await writer.WriteAsync(chunks.ToAsyncEnumerable()); - Dictionary record = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 1) + IngestedChunkRecord record = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 1) .SingleAsync(); Assert.NotNull(record); - Assert.NotNull(record["key"]); - Assert.Equal(documentId, record["documentid"]); - Assert.Equal(chunks[0].Content, record["content"]); + Assert.NotEqual(Guid.Empty, record.Key); + Assert.Equal(documentId, record.DocumentId); + Assert.Equal(chunks[0].Content, record.Content); Assert.True(testEmbeddingGenerator.WasCalled); - foreach (var kvp in chunks[0].Metadata) - { - Assert.True(record.ContainsKey(kvp.Key), $"Record does not contain key '{kvp.Key}'"); - Assert.Equal(kvp.Value, record[kvp.Key]); - } + } + + [Fact] + public async Task CanWriteChunksWithMetadata() + { + string documentId = Guid.NewGuid().ToString(); + + using TestEmbeddingGenerator testEmbeddingGenerator = new(); + using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); + + var collection = vectorStore.GetCollection("chunks-meta"); + using VectorStoreWriter writer = new(collection); + + IngestionDocument document = new(documentId); + IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document); + chunk.Metadata["Classification"] = "important"; + + List> chunks = [chunk]; + + await writer.WriteAsync(chunks.ToAsyncEnumerable()); + + TestChunkRecordWithMetadata record = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 1) + .SingleAsync(); + + Assert.NotNull(record); + Assert.Equal(documentId, record.DocumentId); + Assert.Equal(chunks[0].Content, record.Content); + Assert.Equal("important", record.Classification); } [Fact] @@ -59,9 +81,12 @@ public async Task DoesSupportIncrementalIngestion() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - using VectorStoreWriter writer = new( - vectorStore, - dimensionCount: TestEmbeddingGenerator.DimensionCount, + + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = vectorStore.GetCollection>("chunks-incr", definition); + + using VectorStoreWriter> writer = new( + collection, options: new() { IncrementalIngestion = true, @@ -69,8 +94,6 @@ public async Task DoesSupportIncrementalIngestion() IngestionDocument document = new(documentId); IngestionChunk chunk1 = TestChunkFactory.CreateChunk("first chunk", document); - chunk1.Metadata["key1"] = "value1"; - IngestionChunk chunk2 = TestChunkFactory.CreateChunk("second chunk", document); List> chunks = [chunk1, chunk2]; @@ -78,27 +101,25 @@ public async Task DoesSupportIncrementalIngestion() await writer.WriteAsync(chunks.ToAsyncEnumerable()); int recordCount = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100) + .GetAsync(filter: record => record.DocumentId == documentId, top: 100) .CountAsync(); Assert.Equal(chunks.Count, recordCount); // Now we will do an incremental ingestion that updates the chunk(s). IngestionChunk updatedChunk = TestChunkFactory.CreateChunk("different content", document); - updatedChunk.Metadata["key1"] = "value2"; List> updatedChunks = [updatedChunk]; await writer.WriteAsync(updatedChunks.ToAsyncEnumerable()); // We ask for 100 records, but we expect only 1 as the previous 2 should have been deleted. - Dictionary record = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100) + IngestedChunkRecord record = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 100) .SingleAsync(); Assert.NotNull(record); - Assert.NotNull(record["key"]); - Assert.Equal("different content", record["content"]); - Assert.Equal("value2", record["key1"]); + Assert.NotEqual(Guid.Empty, record.Key); + Assert.Equal("different content", record.Content); } public static TheoryData BatchingTestCases => new() @@ -131,9 +152,11 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts) options.BatchTokenCount = batchTokenCount.Value; } - using VectorStoreWriter writer = new( - vectorStore, - dimensionCount: TestEmbeddingGenerator.DimensionCount, + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = vectorStore.GetCollection>("chunks-batch", definition); + + using VectorStoreWriter> writer = new( + collection, options: options); IngestionDocument document = new(documentId); @@ -146,7 +169,7 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts) await writer.WriteAsync(chunks.ToAsyncEnumerable()); int recordCount = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100) + .GetAsync(filter: record => record.DocumentId == documentId, top: 100) .CountAsync(); Assert.Equal(chunks.Count, recordCount); @@ -159,9 +182,12 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - using VectorStoreWriter writer = new( - vectorStore, - dimensionCount: TestEmbeddingGenerator.DimensionCount, + + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = vectorStore.GetCollection>("chunks-many", definition); + + using VectorStoreWriter> writer = new( + collection, options: new() { IncrementalIngestion = true, @@ -180,7 +206,7 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun await writer.WriteAsync(chunks.ToAsyncEnumerable()); int recordCount = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 10000) + .GetAsync(filter: record => record.DocumentId == documentId, top: 10000) .CountAsync(); Assert.Equal(chunks.Count, recordCount); @@ -194,13 +220,13 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun await writer.WriteAsync(updatedChunks.ToAsyncEnumerable()); // Verify that all old records were deleted and only the new ones remain - List> records = await writer.VectorStoreCollection - .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 10000) + List> records = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 10000) .ToListAsync(); Assert.Equal(updatedChunks.Count, records.Count); - Assert.Contains(records, r => (string)r["content"]! == "updated chunk 1"); - Assert.Contains(records, r => (string)r["content"]! == "updated chunk 2"); + Assert.Contains(records, r => (string)r.Content! == "updated chunk 1"); + Assert.Contains(records, r => (string)r.Content! == "updated chunk 2"); } protected abstract VectorStore CreateVectorStore(TestEmbeddingGenerator testEmbeddingGenerator); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index af609ea239e..a32ef7dd6e8 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,31 +1,16 @@ using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [JsonPropertyName(EmbeddingPropertyName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 9dd366a03a5..1a482ee411b 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index 2d93db8fd94..d4782d5555e 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,31 +1,16 @@ using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [JsonPropertyName(EmbeddingPropertyName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 9dd366a03a5..1a482ee411b 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs index 68af3ef20fb..4dc73a5b4ca 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs @@ -1,31 +1,16 @@ using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [JsonPropertyName(EmbeddingPropertyName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs index d97b986b694..2c4f96cc2ac 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index b55a8b3c817..20a86655c44 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,31 +1,16 @@ using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [JsonPropertyName(EmbeddingPropertyName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 9dd366a03a5..0f91c879f57 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor index 6fc5881c18f..6e5b4212bd0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor @@ -126,7 +126,7 @@ await InvokeAsync(StateHasChanged); var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5); return results.Select(result => - $"{result.Text}"); + $"{result.Content}"); } public void Dispose() diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs index 348bb5d942f..9aa8dab5fe7 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs @@ -1,31 +1,16 @@ using System.Text.Json.Serialization; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Services; -public class IngestedChunk +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreKey(StorageName = "key")] - [JsonPropertyName("key")] - public required Guid Key { get; set; } - - [VectorStoreData(StorageName = "documentid")] - [JsonPropertyName("documentid")] - public required string DocumentId { get; set; } - - [VectorStoreData(StorageName = "content")] - [JsonPropertyName("content")] - public required string Text { get; set; } - - [VectorStoreData(StorageName = "context")] - [JsonPropertyName("context")] - public string? Context { get; set; } - - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] - [JsonPropertyName("embedding")] - public string? Vector => Text; + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [JsonPropertyName(EmbeddingPropertyName)] + public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs index d97b986b694..2c4f96cc2ac 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs @@ -9,15 +9,13 @@ namespace aichatweb.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStore vectorStore, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { - CollectionName = IngestedChunk.CollectionName, - DistanceFunction = IngestedChunk.VectorDistanceFunction, IncrementalIngestion = false, }); From e48fa9d49053da2ffa13adb9d20e1d49903111ff Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:38:52 +0000 Subject: [PATCH 04/14] Fix warnings and test failures - correct VectorStoreCollectionDefinition property names Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/IngestedChunkRecord.cs | 82 ++++++++++--------- .../Writers/VectorStoreWriter.cs | 2 + .../IngestionPipelineTests.cs | 4 +- .../Writers/VectorStoreWriterTests.cs | 4 +- 4 files changed, 50 insertions(+), 42 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs index dd24d8edd18..35dc5a2ba8c 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -18,7 +18,9 @@ namespace Microsoft.Extensions.DataIngestion; /// When the vector dimension count is known at compile time, derive from this class and add /// the to the property. /// +#pragma warning disable CA1005 // Avoid excessive parameters on generic types - TKey, TChunk, and TRecord are all necessary public class IngestedChunkRecord +#pragma warning restore CA1005 { /// /// The storage name for the property. @@ -45,6 +47,48 @@ public class IngestedChunkRecord /// public const string EmbeddingPropertyName = "embedding"; + /// + /// Creates a for . + /// + /// The number of dimensions that the vector has. + /// + /// The distance function to use. When not provided, the default specific to given database will be used. + /// Check for available values. + /// + /// The index kind to use. + /// A suitable for creating a vector store collection. + /// is less than or equal to zero. +#pragma warning disable CA1000 // Do not declare static members on generic types - needs access to TKey and TChunk type parameters + public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null) +#pragma warning restore CA1000 + { + _ = Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0); + + return new VectorStoreCollectionDefinition + { + Properties = + { + new VectorStoreKeyProperty(nameof(Key), typeof(TKey)) { StorageName = KeyPropertyName }, + + // By using TChunk as the type here we allow the vector store + // to handle the conversion from TChunk to the actual vector type it supports. + new VectorStoreVectorProperty(nameof(Embedding), typeof(TChunk), dimensionCount) + { + StorageName = EmbeddingPropertyName, + DistanceFunction = distanceFunction, + IndexKind = indexKind, + }, + new VectorStoreDataProperty(nameof(Content), typeof(TChunk)) { StorageName = ContentPropertyName }, + new VectorStoreDataProperty(nameof(Context), typeof(string)) { StorageName = ContextPropertyName }, + new VectorStoreDataProperty(nameof(DocumentId), typeof(string)) + { + StorageName = DocumentIdPropertyName, + IsIndexed = true, + }, + }, + }; + } + /// /// Gets or sets the unique key for this record. /// @@ -98,42 +142,4 @@ public virtual void SetMetadata(string key, object? value) // Default implementation: no-op. // Derived classes can override to map metadata keys to typed properties. } - - /// - /// Creates a for . - /// - /// The number of dimensions that the vector has. - /// - /// The distance function to use. When not provided, the default specific to given database will be used. - /// Check for available values. - /// - /// The index kind to use. - /// A suitable for creating a vector store collection. - /// is less than or equal to zero. - public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null) - { - Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0); - - return new VectorStoreCollectionDefinition - { - Properties = - { - new VectorStoreKeyProperty(KeyPropertyName, typeof(TKey)), - - // By using TChunk as the type here we allow the vector store - // to handle the conversion from TChunk to the actual vector type it supports. - new VectorStoreVectorProperty(EmbeddingPropertyName, typeof(TChunk), dimensionCount) - { - DistanceFunction = distanceFunction, - IndexKind = indexKind, - }, - new VectorStoreDataProperty(ContentPropertyName, typeof(TChunk)), - new VectorStoreDataProperty(ContextPropertyName, typeof(string)), - new VectorStoreDataProperty(DocumentIdPropertyName, typeof(string)) - { - IsIndexed = true, - }, - }, - }; - } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs index d373843af86..a64e0f92733 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs @@ -16,7 +16,9 @@ namespace Microsoft.Extensions.DataIngestion; /// The type of the key for the record. /// The type of the chunk content. /// The type of the record stored in the vector store. +#pragma warning disable CA1005 // Avoid excessive parameters on generic types - TKey, TChunk, and TRecord are all necessary public sealed class VectorStoreWriter : IngestionChunkWriter +#pragma warning restore CA1005 where TKey : notnull where TRecord : IngestedChunkRecord, new() { diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs index a2365b1a02e..6f1143072ab 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs @@ -106,7 +106,7 @@ public async Task CanProcessDocuments() for (int i = 0; i < retrieved.Count; i++) { Assert.NotEqual(Guid.Empty, retrieved[i].Key); - Assert.NotEmpty((string)retrieved[i].Content!); + Assert.NotEmpty(retrieved[i].Content!); Assert.Contains(retrieved[i].DocumentId, _sampleFiles.Select(info => info.FullName)); } @@ -143,7 +143,7 @@ public async Task CanProcessDocumentsInDirectory() for (int i = 0; i < retrieved.Count; i++) { Assert.NotEqual(Guid.Empty, retrieved[i].Key); - Assert.NotEmpty((string)retrieved[i].Content!); + Assert.NotEmpty(retrieved[i].Content!); Assert.StartsWith(directory.FullName, retrieved[i].DocumentId); } diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs index 6f2177f84cf..353e569030a 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs @@ -225,8 +225,8 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun .ToListAsync(); Assert.Equal(updatedChunks.Count, records.Count); - Assert.Contains(records, r => (string)r.Content! == "updated chunk 1"); - Assert.Contains(records, r => (string)r.Content! == "updated chunk 2"); + Assert.Contains(records, r => r.Content == "updated chunk 1"); + Assert.Contains(records, r => r.Content == "updated chunk 2"); } protected abstract VectorStore CreateVectorStore(TestEmbeddingGenerator testEmbeddingGenerator); From e9aa7fe4215a9f10d489965f6149ef9933ef6fcd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:39:54 +0000 Subject: [PATCH 05/14] Address code review feedback - fix error message wording Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/VectorStoreWriter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs index a64e0f92733..a7f3399bf3c 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs @@ -120,7 +120,7 @@ private static TKey GenerateKey() return (TKey)(object)Guid.NewGuid().ToString(); } - throw new NotSupportedException($"Automatic key generation is not supported for key type '{typeof(TKey)}'. Supported key types are Guid and string."); + throw new NotSupportedException($"Automatic key generation is not supported for key type '{typeof(TKey)}'. Supported key types are Guid or string."); } private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken) From 56fa7ad77847ce8ce8aa122b1a83d66aaf1e8b3f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 12:07:57 +0000 Subject: [PATCH 06/14] Address reviewer feedback: remove TKey generic, make properties virtual, throw in SetMetadata, make consts protected, remove JSON attributes Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/IngestedChunkRecord.cs | 42 +++++++------------ .../Writers/VectorStoreWriter.cs | 37 ++++------------ .../AIChatWeb-CSharp.Web/Program.Aspire.cs | 2 +- .../AIChatWeb-CSharp.Web/Program.cs | 2 +- .../Services/IngestedChunk.cs | 10 +---- .../Services/Ingestion/DataIngestor.cs | 10 +---- .../Services/SemanticSearch.cs | 4 -- .../IngestionPipelineTests.cs | 24 +++++------ .../Writers/TestChunkRecordWithMetadata.cs | 3 +- .../Writers/VectorStoreWriterTests.cs | 32 +++++++------- .../aichatweb/aichatweb.Web/Program.cs | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 3 +- .../Services/Ingestion/DataIngestor.cs | 4 +- .../aichatweb.Web/Services/SemanticSearch.cs | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 3 +- .../Services/Ingestion/DataIngestor.cs | 4 +- .../aichatweb.Web/Services/SemanticSearch.cs | 2 +- .../aichatweb/Program.cs | 2 +- .../aichatweb/Services/IngestedChunk.cs | 3 +- .../Services/Ingestion/DataIngestor.cs | 4 +- .../aichatweb/Services/SemanticSearch.cs | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 3 +- .../Services/Ingestion/DataIngestor.cs | 2 +- .../aichatweb/Services/IngestedChunk.cs | 3 +- .../Services/Ingestion/DataIngestor.cs | 4 +- .../aichatweb/Services/SemanticSearch.cs | 2 +- 26 files changed, 79 insertions(+), 132 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs index 35dc5a2ba8c..1a634a1c4ae 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -2,15 +2,13 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; -using System.Text.Json.Serialization; using Microsoft.Extensions.VectorData; namespace Microsoft.Extensions.DataIngestion; /// -/// Represents the base record type used by to store ingested chunks in a vector store. +/// Represents the base record type used by to store ingested chunks in a vector store. /// -/// The type of the key for the record. /// The type of the chunk content. /// /// When the vector dimension count is not known at compile time, use the @@ -18,37 +16,35 @@ namespace Microsoft.Extensions.DataIngestion; /// When the vector dimension count is known at compile time, derive from this class and add /// the to the property. /// -#pragma warning disable CA1005 // Avoid excessive parameters on generic types - TKey, TChunk, and TRecord are all necessary -public class IngestedChunkRecord -#pragma warning restore CA1005 +public class IngestedChunkRecord { /// /// The storage name for the property. /// - public const string KeyPropertyName = "key"; + protected const string KeyPropertyName = "key"; /// /// The storage name for the property. /// - public const string DocumentIdPropertyName = "documentid"; + protected const string DocumentIdPropertyName = "documentid"; /// /// The storage name for the property. /// - public const string ContentPropertyName = "content"; + protected const string ContentPropertyName = "content"; /// /// The storage name for the property. /// - public const string ContextPropertyName = "context"; + protected const string ContextPropertyName = "context"; /// /// The storage name for the property. /// - public const string EmbeddingPropertyName = "embedding"; + protected const string EmbeddingPropertyName = "embedding"; /// - /// Creates a for . + /// Creates a for . /// /// The number of dimensions that the vector has. /// @@ -58,7 +54,7 @@ public class IngestedChunkRecord /// The index kind to use. /// A suitable for creating a vector store collection. /// is less than or equal to zero. -#pragma warning disable CA1000 // Do not declare static members on generic types - needs access to TKey and TChunk type parameters +#pragma warning disable CA1000 // Do not declare static members on generic types - needs access to TChunk type parameter public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null) #pragma warning restore CA1000 { @@ -68,7 +64,7 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim { Properties = { - new VectorStoreKeyProperty(nameof(Key), typeof(TKey)) { StorageName = KeyPropertyName }, + new VectorStoreKeyProperty(nameof(Key), typeof(Guid)) { StorageName = KeyPropertyName }, // By using TChunk as the type here we allow the vector store // to handle the conversion from TChunk to the actual vector type it supports. @@ -93,29 +89,25 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim /// Gets or sets the unique key for this record. /// [VectorStoreKey(StorageName = KeyPropertyName)] - [JsonPropertyName(KeyPropertyName)] - public TKey Key { get; set; } = default!; + public virtual Guid Key { get; set; } /// /// Gets or sets the identifier of the document from which this chunk was extracted. /// [VectorStoreData(StorageName = DocumentIdPropertyName)] - [JsonPropertyName(DocumentIdPropertyName)] - public string DocumentId { get; set; } = string.Empty; + public virtual string DocumentId { get; set; } = string.Empty; /// /// Gets or sets the content of the chunk. /// [VectorStoreData(StorageName = ContentPropertyName)] - [JsonPropertyName(ContentPropertyName)] - public TChunk? Content { get; set; } + public virtual TChunk? Content { get; set; } /// /// Gets or sets additional context for the chunk. /// [VectorStoreData(StorageName = ContextPropertyName)] - [JsonPropertyName(ContextPropertyName)] - public string? Context { get; set; } + public virtual string? Context { get; set; } /// /// Gets the embedding value for this record. @@ -125,7 +117,6 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim /// will convert this to a vector. Override this property in derived classes to add /// the with the appropriate dimension count. /// - [JsonPropertyName(EmbeddingPropertyName)] public virtual TChunk? Embedding => Content; /// @@ -135,11 +126,10 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim /// The metadata value. /// /// Override this method in derived classes to store metadata as typed properties with - /// attributes. The default implementation is a no-op. + /// attributes. /// public virtual void SetMetadata(string key, object? value) { - // Default implementation: no-op. - // Derived classes can override to map metadata keys to typed properties. + throw new NotSupportedException($"Metadata key '{key}' is not supported. Override {nameof(SetMetadata)} in a derived class to handle metadata."); } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs index a7f3399bf3c..dee73b6a86e 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs @@ -13,25 +13,21 @@ namespace Microsoft.Extensions.DataIngestion; /// /// Writes chunks to a . /// -/// The type of the key for the record. /// The type of the chunk content. /// The type of the record stored in the vector store. -#pragma warning disable CA1005 // Avoid excessive parameters on generic types - TKey, TChunk, and TRecord are all necessary -public sealed class VectorStoreWriter : IngestionChunkWriter -#pragma warning restore CA1005 - where TKey : notnull - where TRecord : IngestedChunkRecord, new() +public sealed class VectorStoreWriter : IngestionChunkWriter + where TRecord : IngestedChunkRecord, new() { private readonly VectorStoreWriterOptions _options; private bool _collectionEnsured; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The to use to store the instances. /// The options for the vector store writer. /// When is null. - public VectorStoreWriter(VectorStoreCollection collection, VectorStoreWriterOptions? options = default) + public VectorStoreWriter(VectorStoreCollection collection, VectorStoreWriterOptions? options = default) { VectorStoreCollection = Throw.IfNull(collection); _options = options ?? new VectorStoreWriterOptions(); @@ -40,14 +36,14 @@ public VectorStoreWriter(VectorStoreCollection collection, Vector /// /// Gets the underlying used to store the chunks. /// - public VectorStoreCollection VectorStoreCollection { get; } + public VectorStoreCollection VectorStoreCollection { get; } /// public override async Task WriteAsync(IAsyncEnumerable> chunks, CancellationToken cancellationToken = default) { _ = Throw.IfNull(chunks); - IReadOnlyList? preExistingKeys = null; + IReadOnlyList? preExistingKeys = null; List? batch = null; long currentBatchTokenCount = 0; @@ -66,7 +62,7 @@ public override async Task WriteAsync(IAsyncEnumerable> c TRecord record = new() { - Key = GenerateKey(), + Key = Guid.NewGuid(), Content = chunk.Content, Context = chunk.Context, DocumentId = chunk.Document.Identifier, @@ -108,22 +104,7 @@ public override async Task WriteAsync(IAsyncEnumerable> c } } - private static TKey GenerateKey() - { - if (typeof(TKey) == typeof(Guid)) - { - return (TKey)(object)Guid.NewGuid(); - } - - if (typeof(TKey) == typeof(string)) - { - return (TKey)(object)Guid.NewGuid().ToString(); - } - - throw new NotSupportedException($"Automatic key generation is not supported for key type '{typeof(TKey)}'. Supported key types are Guid or string."); - } - - private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken) + private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken) { if (!_options.IncrementalIngestion) { @@ -133,7 +114,7 @@ private async Task> GetPreExistingChunksIdsAsync(IngestionDo // Each Vector Store has a different max top count limit, so we use low value and loop. const int MaxTopCount = 1_000; - List keys = []; + List keys = []; int insertedCount; do { diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs index 31442718f1f..178d77b9465 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs @@ -44,7 +44,7 @@ var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db"); var vectorStoreConnectionString = $"Data Source={vectorStorePath}"; builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString); -builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); +builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); #endif builder.Services.AddSingleton(); builder.Services.AddSingleton(); diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs index 4b041f4a15f..0736628c417 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs @@ -105,7 +105,7 @@ var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db"); var vectorStoreConnectionString = $"Data Source={vectorStorePath}"; builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString); -builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); +builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); #endif builder.Services.AddSingleton(); diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs index ae1ac018622..915f72c8339 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs @@ -1,14 +1,9 @@ -using System.Text.Json.Serialization; -using Microsoft.Extensions.DataIngestion; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace AIChatWeb_CSharp.Web.Services; -#if (IsQdrant) -public class IngestedChunk : IngestedChunkRecord -#else -public class IngestedChunk : IngestedChunkRecord -#endif +public class IngestedChunk : IngestedChunkRecord { #if (IsOllama) public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model @@ -23,6 +18,5 @@ public class IngestedChunk : IngestedChunkRecord public const string CollectionName = "data-AIChatWeb-CSharp.Web-chunks"; [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] - [JsonPropertyName(EmbeddingPropertyName)] public override string? Embedding => Content; } diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs index 5d9cffe29a4..76168b6e632 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs @@ -9,20 +9,12 @@ namespace AIChatWeb_CSharp.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, -#if (IsQdrant) VectorStoreCollection vectorCollection, -#else - VectorStoreCollection vectorCollection, -#endif IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { -#if (IsQdrant) - using var writer = new VectorStoreWriter(vectorCollection, new() -#else - using var writer = new VectorStoreWriter(vectorCollection, new() -#endif + using var writer = new VectorStoreWriter(vectorCollection, new() { IncrementalIngestion = false, }); diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs index 49bef8de3d1..8cdc6dbeae0 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs @@ -4,11 +4,7 @@ namespace AIChatWeb_CSharp.Web.Services; public class SemanticSearch( -#if (IsQdrant) VectorStoreCollection vectorCollection, -#else - VectorStoreCollection vectorCollection, -#endif [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs index 6f1143072ab..cba7f497857 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs @@ -86,9 +86,9 @@ public async Task CanProcessDocuments() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = testVectorStore.GetCollection>("chunks", definition); - using VectorStoreWriter> vectorStoreWriter = new(collection); + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = testVectorStore.GetCollection>("chunks", definition); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); List ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync(); @@ -122,9 +122,9 @@ public async Task CanProcessDocumentsInDirectory() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = testVectorStore.GetCollection>("chunks-dir", definition); - using VectorStoreWriter> vectorStoreWriter = new(collection); + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = testVectorStore.GetCollection>("chunks-dir", definition); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); @@ -159,9 +159,9 @@ public async Task ChunksCanBeMoreThanJustText() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = testVectorStore.GetCollection>("chunks-img", definition); - using VectorStoreWriter> vectorStoreWriter = new(collection); + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = testVectorStore.GetCollection>("chunks-img", definition); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter); Assert.False(embeddingGenerator.WasCalled); @@ -211,9 +211,9 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = testVectorStore.GetCollection>("chunks-fail", definition); - using VectorStoreWriter> vectorStoreWriter = new(collection); + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = testVectorStore.GetCollection>("chunks-fail", definition); + using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(failingForFirstReader, CreateChunker(), vectorStoreWriter); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs index bc283241509..505e79cfd69 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs @@ -7,12 +7,11 @@ namespace Microsoft.Extensions.DataIngestion.Writers.Tests; -public class TestChunkRecordWithMetadata : IngestedChunkRecord +public class TestChunkRecordWithMetadata : IngestedChunkRecord { public const int TestDimensionCount = 4; [VectorStoreVector(TestDimensionCount, StorageName = EmbeddingPropertyName)] - [JsonPropertyName(EmbeddingPropertyName)] public override string? Embedding => Content; [VectorStoreData(StorageName = "classification")] diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs index 353e569030a..99cf48188a3 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs @@ -21,10 +21,10 @@ public async Task CanWriteChunks() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = vectorStore.GetCollection>("chunks", definition); + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = vectorStore.GetCollection>("chunks", definition); - using VectorStoreWriter> writer = new(collection); + using VectorStoreWriter> writer = new(collection); IngestionDocument document = new(documentId); IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document); @@ -34,7 +34,7 @@ public async Task CanWriteChunks() Assert.False(testEmbeddingGenerator.WasCalled); await writer.WriteAsync(chunks.ToAsyncEnumerable()); - IngestedChunkRecord record = await writer.VectorStoreCollection + IngestedChunkRecord record = await writer.VectorStoreCollection .GetAsync(filter: record => record.DocumentId == documentId, top: 1) .SingleAsync(); @@ -54,7 +54,7 @@ public async Task CanWriteChunksWithMetadata() using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); var collection = vectorStore.GetCollection("chunks-meta"); - using VectorStoreWriter writer = new(collection); + using VectorStoreWriter writer = new(collection); IngestionDocument document = new(documentId); IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document); @@ -82,10 +82,10 @@ public async Task DoesSupportIncrementalIngestion() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = vectorStore.GetCollection>("chunks-incr", definition); + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = vectorStore.GetCollection>("chunks-incr", definition); - using VectorStoreWriter> writer = new( + using VectorStoreWriter> writer = new( collection, options: new() { @@ -113,7 +113,7 @@ public async Task DoesSupportIncrementalIngestion() await writer.WriteAsync(updatedChunks.ToAsyncEnumerable()); // We ask for 100 records, but we expect only 1 as the previous 2 should have been deleted. - IngestedChunkRecord record = await writer.VectorStoreCollection + IngestedChunkRecord record = await writer.VectorStoreCollection .GetAsync(filter: record => record.DocumentId == documentId, top: 100) .SingleAsync(); @@ -152,10 +152,10 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts) options.BatchTokenCount = batchTokenCount.Value; } - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = vectorStore.GetCollection>("chunks-batch", definition); + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = vectorStore.GetCollection>("chunks-batch", definition); - using VectorStoreWriter> writer = new( + using VectorStoreWriter> writer = new( collection, options: options); @@ -183,10 +183,10 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = vectorStore.GetCollection>("chunks-many", definition); + var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var collection = vectorStore.GetCollection>("chunks-many", definition); - using VectorStoreWriter> writer = new( + using VectorStoreWriter> writer = new( collection, options: new() { @@ -220,7 +220,7 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun await writer.WriteAsync(updatedChunks.ToAsyncEnumerable()); // Verify that all old records were deleted and only the new ones remain - List> records = await writer.VectorStoreCollection + List> records = await writer.VectorStoreCollection .GetAsync(filter: record => record.DocumentId == documentId, top: 10000) .ToListAsync(); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Program.cs index e47bad71cc8..0dd33b88a71 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Program.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Program.cs @@ -18,7 +18,7 @@ var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db"); var vectorStoreConnectionString = $"Data Source={vectorStorePath}"; builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString); -builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); +builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddKeyedSingleton("ingestion_directory", new DirectoryInfo(Path.Combine(builder.Environment.WebRootPath, "Data"))); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index a32ef7dd6e8..6b3a88534ac 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -4,13 +4,12 @@ namespace aichatweb.Web.Services; -public class IngestedChunk : IngestedChunkRecord +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] - [JsonPropertyName(EmbeddingPropertyName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 1a482ee411b..61088b1225d 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -9,12 +9,12 @@ namespace aichatweb.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorCollection, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs index d043c8efb84..7d8718028bd 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs @@ -4,7 +4,7 @@ namespace aichatweb.Web.Services; public class SemanticSearch( - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index d4782d5555e..bc50ca4c0ef 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -4,13 +4,12 @@ namespace aichatweb.Web.Services; -public class IngestedChunk : IngestedChunkRecord +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] - [JsonPropertyName(EmbeddingPropertyName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 1a482ee411b..61088b1225d 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -9,12 +9,12 @@ namespace aichatweb.Web.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorCollection, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs index d043c8efb84..7d8718028bd 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs @@ -4,7 +4,7 @@ namespace aichatweb.Web.Services; public class SemanticSearch( - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Program.cs index 27e50372647..2df0c881faa 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Program.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Program.cs @@ -25,7 +25,7 @@ var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db"); var vectorStoreConnectionString = $"Data Source={vectorStorePath}"; builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString); -builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); +builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString); builder.Services.AddSingleton(); builder.Services.AddSingleton(); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs index 4dc73a5b4ca..259e6578098 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs @@ -4,13 +4,12 @@ namespace aichatweb.Services; -public class IngestedChunk : IngestedChunkRecord +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] - [JsonPropertyName(EmbeddingPropertyName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs index 2c4f96cc2ac..b4675927d47 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs @@ -9,12 +9,12 @@ namespace aichatweb.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorCollection, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/SemanticSearch.cs index 8072f8bcddb..f825ebf5a5c 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/SemanticSearch.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/SemanticSearch.cs @@ -4,7 +4,7 @@ namespace aichatweb.Services; public class SemanticSearch( - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index 20a86655c44..dc2874c6b58 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -4,13 +4,12 @@ namespace aichatweb.Web.Services; -public class IngestedChunk : IngestedChunkRecord +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] - [JsonPropertyName(EmbeddingPropertyName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs index 0f91c879f57..61088b1225d 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs @@ -14,7 +14,7 @@ public class DataIngestor( { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorCollection, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs index 9aa8dab5fe7..97799bdb212 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs @@ -4,13 +4,12 @@ namespace aichatweb.Services; -public class IngestedChunk : IngestedChunkRecord +public class IngestedChunk : IngestedChunkRecord { public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] - [JsonPropertyName(EmbeddingPropertyName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs index 2c4f96cc2ac..b4675927d47 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs @@ -9,12 +9,12 @@ namespace aichatweb.Services.Ingestion; public class DataIngestor( ILogger logger, ILoggerFactory loggerFactory, - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, IEmbeddingGenerator> embeddingGenerator) { public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern) { - using var writer = new VectorStoreWriter(vectorCollection, new() + using var writer = new VectorStoreWriter(vectorCollection, new() { IncrementalIngestion = false, }); diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/SemanticSearch.cs index 8072f8bcddb..f825ebf5a5c 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/SemanticSearch.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/SemanticSearch.cs @@ -4,7 +4,7 @@ namespace aichatweb.Services; public class SemanticSearch( - VectorStoreCollection vectorCollection, + VectorStoreCollection vectorCollection, [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory, DataIngestor dataIngestor) { From c3c57e0ea26107448fc4af82099df71a093bee7e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 12:33:25 +0000 Subject: [PATCH 07/14] Address second round of reviewer feedback: make consts private, fix doc reference, remove JsonPropertyName, add custom schema test Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/IngestedChunkRecord.cs | 29 ++--------- .../Writers/VectorStoreWriterOptions.cs | 2 +- .../Services/IngestedChunk.cs | 2 +- .../Writers/TestChunkRecordWithMetadata.cs | 4 +- .../Writers/VectorStoreWriterTests.cs | 49 +++++++++++++++++++ .../aichatweb.Web/Services/IngestedChunk.cs | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 2 +- .../aichatweb/Services/IngestedChunk.cs | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 2 +- .../aichatweb/Services/IngestedChunk.cs | 2 +- 10 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs index 1a634a1c4ae..cbbff78de31 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -18,30 +18,11 @@ namespace Microsoft.Extensions.DataIngestion; /// public class IngestedChunkRecord { - /// - /// The storage name for the property. - /// - protected const string KeyPropertyName = "key"; - - /// - /// The storage name for the property. - /// - protected const string DocumentIdPropertyName = "documentid"; - - /// - /// The storage name for the property. - /// - protected const string ContentPropertyName = "content"; - - /// - /// The storage name for the property. - /// - protected const string ContextPropertyName = "context"; - - /// - /// The storage name for the property. - /// - protected const string EmbeddingPropertyName = "embedding"; + private const string KeyPropertyName = "key"; + private const string DocumentIdPropertyName = "documentid"; + private const string ContentPropertyName = "content"; + private const string ContextPropertyName = "context"; + private const string EmbeddingPropertyName = "embedding"; /// /// Creates a for . diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs index 3799c3a56cc..d86ddb8e97f 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs @@ -7,7 +7,7 @@ namespace Microsoft.Extensions.DataIngestion; /// -/// Represents options for the . +/// Represents options for the . /// public sealed class VectorStoreWriterOptions { diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs index 915f72c8339..d607b7c4ae3 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs @@ -17,6 +17,6 @@ public class IngestedChunk : IngestedChunkRecord #endif public const string CollectionName = "data-AIChatWeb-CSharp.Web-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] public override string? Embedding => Content; } diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs index 505e79cfd69..e91e0648e18 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; -using System.Text.Json.Serialization; using Microsoft.Extensions.VectorData; namespace Microsoft.Extensions.DataIngestion.Writers.Tests; @@ -11,11 +10,10 @@ public class TestChunkRecordWithMetadata : IngestedChunkRecord { public const int TestDimensionCount = 4; - [VectorStoreVector(TestDimensionCount, StorageName = EmbeddingPropertyName)] + [VectorStoreVector(TestDimensionCount, StorageName = "embedding")] public override string? Embedding => Content; [VectorStoreData(StorageName = "classification")] - [JsonPropertyName("classification")] public string? Classification { get; set; } public override void SetMetadata(string key, object? value) diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs index 99cf48188a3..a7574e6b83c 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs @@ -13,6 +13,55 @@ namespace Microsoft.Extensions.DataIngestion.Writers.Tests; public abstract class VectorStoreWriterTests { + [Fact] + public async Task CanWriteChunksWithCustomDefinition() + { + string documentId = Guid.NewGuid().ToString(); + + using TestEmbeddingGenerator testEmbeddingGenerator = new(); + using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); + + // User creates their own definition without using CreateCollectionDefinition + var definition = new VectorStoreCollectionDefinition + { + Properties = + { + new VectorStoreKeyProperty(nameof(IngestedChunkRecord.Key), typeof(Guid)) { StorageName = "key" }, + new VectorStoreVectorProperty(nameof(IngestedChunkRecord.Embedding), typeof(string), TestEmbeddingGenerator.DimensionCount) + { + StorageName = "embedding", + }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord.Content), typeof(string)) { StorageName = "content" }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord.Context), typeof(string)) { StorageName = "context" }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord.DocumentId), typeof(string)) + { + StorageName = "documentid", + IsIndexed = true, + }, + }, + }; + + var collection = vectorStore.GetCollection>("chunks-custom", definition); + + using VectorStoreWriter> writer = new(collection); + + IngestionDocument document = new(documentId); + IngestionChunk chunk = TestChunkFactory.CreateChunk("custom schema content", document); + + List> chunks = [chunk]; + + await writer.WriteAsync(chunks.ToAsyncEnumerable()); + + IngestedChunkRecord record = await writer.VectorStoreCollection + .GetAsync(filter: record => record.DocumentId == documentId, top: 1) + .SingleAsync(); + + Assert.NotNull(record); + Assert.NotEqual(Guid.Empty, record.Key); + Assert.Equal(documentId, record.DocumentId); + Assert.Equal(chunks[0].Content, record.Content); + } + [Fact] public async Task CanWriteChunks() { diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index 6b3a88534ac..94d56dd8804 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index bc50ca4c0ef..940552872cc 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs index 259e6578098..d2a463cbca2 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index dc2874c6b58..b58b5212ed2 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs index 97799bdb212..fbb19c14b82 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] public override string? Embedding => Content; } From b3153bfa5dfb3897b656635e4e758012b346968e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 12:44:03 +0000 Subject: [PATCH 08/14] Rename EmbeddingPropertyName to EmbeddingStorageName (protected), use custom storage names in test Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/IngestedChunkRecord.cs | 7 +++++-- .../Services/IngestedChunk.cs | 2 +- .../Writers/TestChunkRecordWithMetadata.cs | 2 +- .../Writers/VectorStoreWriterTests.cs | 15 ++++++++------- .../aichatweb.Web/Services/IngestedChunk.cs | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 2 +- .../aichatweb/Services/IngestedChunk.cs | 2 +- .../aichatweb.Web/Services/IngestedChunk.cs | 2 +- .../aichatweb/Services/IngestedChunk.cs | 2 +- 9 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs index cbbff78de31..e11518b33ce 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -22,7 +22,10 @@ public class IngestedChunkRecord private const string DocumentIdPropertyName = "documentid"; private const string ContentPropertyName = "content"; private const string ContextPropertyName = "context"; - private const string EmbeddingPropertyName = "embedding"; + /// + /// The storage name for the property. + /// + protected const string EmbeddingStorageName = "embedding"; /// /// Creates a for . @@ -51,7 +54,7 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim // to handle the conversion from TChunk to the actual vector type it supports. new VectorStoreVectorProperty(nameof(Embedding), typeof(TChunk), dimensionCount) { - StorageName = EmbeddingPropertyName, + StorageName = EmbeddingStorageName, DistanceFunction = distanceFunction, IndexKind = indexKind, }, diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs index d607b7c4ae3..199b91e638b 100644 --- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs +++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs @@ -17,6 +17,6 @@ public class IngestedChunk : IngestedChunkRecord #endif public const string CollectionName = "data-AIChatWeb-CSharp.Web-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] public override string? Embedding => Content; } diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs index e91e0648e18..f58c528a4d7 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs @@ -10,7 +10,7 @@ public class TestChunkRecordWithMetadata : IngestedChunkRecord { public const int TestDimensionCount = 4; - [VectorStoreVector(TestDimensionCount, StorageName = "embedding")] + [VectorStoreVector(TestDimensionCount, StorageName = EmbeddingStorageName)] public override string? Embedding => Content; [VectorStoreData(StorageName = "classification")] diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs index a7574e6b83c..17066077e2b 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs @@ -21,21 +21,22 @@ public async Task CanWriteChunksWithCustomDefinition() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - // User creates their own definition without using CreateCollectionDefinition - var definition = new VectorStoreCollectionDefinition + // User creates their own definition without using CreateCollectionDefinition, + // using custom storage names to prove they can map to a pre-existing collection schema. + VectorStoreCollectionDefinition definition = new() { Properties = { - new VectorStoreKeyProperty(nameof(IngestedChunkRecord.Key), typeof(Guid)) { StorageName = "key" }, + new VectorStoreKeyProperty(nameof(IngestedChunkRecord.Key), typeof(Guid)) { StorageName = "custom_key" }, new VectorStoreVectorProperty(nameof(IngestedChunkRecord.Embedding), typeof(string), TestEmbeddingGenerator.DimensionCount) { - StorageName = "embedding", + StorageName = "custom_embedding", }, - new VectorStoreDataProperty(nameof(IngestedChunkRecord.Content), typeof(string)) { StorageName = "content" }, - new VectorStoreDataProperty(nameof(IngestedChunkRecord.Context), typeof(string)) { StorageName = "context" }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord.Content), typeof(string)) { StorageName = "custom_content" }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord.Context), typeof(string)) { StorageName = "custom_context" }, new VectorStoreDataProperty(nameof(IngestedChunkRecord.DocumentId), typeof(string)) { - StorageName = "documentid", + StorageName = "custom_documentid", IsIndexed = true, }, }, diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index 94d56dd8804..2fe5300d05b 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index 940552872cc..d1d785a0072 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs index d2a463cbca2..611b8f37d7c 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineDistance; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index b58b5212ed2..1ad1cabf5d0 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] public override string? Embedding => Content; } diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs index fbb19c14b82..0e0b05795c3 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs @@ -10,6 +10,6 @@ public class IngestedChunk : IngestedChunkRecord public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity; public const string CollectionName = "data-aichatweb-chunks"; - [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")] + [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingStorageName)] public override string? Embedding => Content; } From 550781c1ec9f4f7904c5f04b0987a062ea18b0ca Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 12:52:27 +0000 Subject: [PATCH 09/14] Rename private storage name consts from *PropertyName to *StorageName for consistency Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/IngestedChunkRecord.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs index e11518b33ce..025384f4992 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -18,10 +18,10 @@ namespace Microsoft.Extensions.DataIngestion; /// public class IngestedChunkRecord { - private const string KeyPropertyName = "key"; - private const string DocumentIdPropertyName = "documentid"; - private const string ContentPropertyName = "content"; - private const string ContextPropertyName = "context"; + private const string KeyStorageName = "key"; + private const string DocumentIdStorageName = "documentid"; + private const string ContentStorageName = "content"; + private const string ContextStorageName = "context"; /// /// The storage name for the property. /// @@ -48,7 +48,7 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim { Properties = { - new VectorStoreKeyProperty(nameof(Key), typeof(Guid)) { StorageName = KeyPropertyName }, + new VectorStoreKeyProperty(nameof(Key), typeof(Guid)) { StorageName = KeyStorageName }, // By using TChunk as the type here we allow the vector store // to handle the conversion from TChunk to the actual vector type it supports. @@ -58,11 +58,11 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim DistanceFunction = distanceFunction, IndexKind = indexKind, }, - new VectorStoreDataProperty(nameof(Content), typeof(TChunk)) { StorageName = ContentPropertyName }, - new VectorStoreDataProperty(nameof(Context), typeof(string)) { StorageName = ContextPropertyName }, + new VectorStoreDataProperty(nameof(Content), typeof(TChunk)) { StorageName = ContentStorageName }, + new VectorStoreDataProperty(nameof(Context), typeof(string)) { StorageName = ContextStorageName }, new VectorStoreDataProperty(nameof(DocumentId), typeof(string)) { - StorageName = DocumentIdPropertyName, + StorageName = DocumentIdStorageName, IsIndexed = true, }, }, @@ -72,25 +72,25 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim /// /// Gets or sets the unique key for this record. /// - [VectorStoreKey(StorageName = KeyPropertyName)] + [VectorStoreKey(StorageName = KeyStorageName)] public virtual Guid Key { get; set; } /// /// Gets or sets the identifier of the document from which this chunk was extracted. /// - [VectorStoreData(StorageName = DocumentIdPropertyName)] + [VectorStoreData(StorageName = DocumentIdStorageName)] public virtual string DocumentId { get; set; } = string.Empty; /// /// Gets or sets the content of the chunk. /// - [VectorStoreData(StorageName = ContentPropertyName)] + [VectorStoreData(StorageName = ContentStorageName)] public virtual TChunk? Content { get; set; } /// /// Gets or sets additional context for the chunk. /// - [VectorStoreData(StorageName = ContextPropertyName)] + [VectorStoreData(StorageName = ContextStorageName)] public virtual string? Context { get; set; } /// From def5e6aae45483918a8fe096e090b45d96e9f242 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 16 Mar 2026 14:13:28 +0100 Subject: [PATCH 10/14] Apply suggestions from code review Co-authored-by: Adam Sitnik --- .../Writers/IngestedChunkRecord.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs index 025384f4992..fc1e657c59f 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -22,6 +22,7 @@ public class IngestedChunkRecord private const string DocumentIdStorageName = "documentid"; private const string ContentStorageName = "content"; private const string ContextStorageName = "context"; + /// /// The storage name for the property. /// From 64af7996de6818d297f9cfc280861cf18f34ab38 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 13:41:51 +0000 Subject: [PATCH 11/14] Remove unnecessary 'using System' from TestChunkRecordWithMetadata.cs Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/TestChunkRecordWithMetadata.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs index f58c528a4d7..a5949291b3a 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System; using Microsoft.Extensions.VectorData; namespace Microsoft.Extensions.DataIngestion.Writers.Tests; From 81f4817b1666c8a8e4c628a50f458e3fed8a02ae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 14:40:12 +0000 Subject: [PATCH 12/14] Remove stale `using System.Text.Json.Serialization` from all 5 snapshot IngestedChunk.cs files Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../aichatweb/aichatweb.Web/Services/IngestedChunk.cs | 3 +-- .../aichatweb/aichatweb.Web/Services/IngestedChunk.cs | 3 +-- .../aichatweb/Services/IngestedChunk.cs | 3 +-- .../aichatweb/aichatweb.Web/Services/IngestedChunk.cs | 3 +-- .../aichatweb/Services/IngestedChunk.cs | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index 2fe5300d05b..1dec3c99a20 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,5 +1,4 @@ -using System.Text.Json.Serialization; -using Microsoft.Extensions.DataIngestion; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index d1d785a0072..b1a757a01e7 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,5 +1,4 @@ -using System.Text.Json.Serialization; -using Microsoft.Extensions.DataIngestion; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs index 611b8f37d7c..998a2d0db4d 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs @@ -1,5 +1,4 @@ -using System.Text.Json.Serialization; -using Microsoft.Extensions.DataIngestion; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Services; diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs index 1ad1cabf5d0..174fe48a4ae 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs @@ -1,5 +1,4 @@ -using System.Text.Json.Serialization; -using Microsoft.Extensions.DataIngestion; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Web.Services; diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs index 0e0b05795c3..c364b7dd235 100644 --- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs +++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs @@ -1,5 +1,4 @@ -using System.Text.Json.Serialization; -using Microsoft.Extensions.DataIngestion; +using Microsoft.Extensions.DataIngestion; using Microsoft.Extensions.VectorData; namespace aichatweb.Services; From f20bb41e3a85c874396e44adbc7ba397bd319a9e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 16:06:17 +0000 Subject: [PATCH 13/14] Rename CreateCollectionDefinition to CreateDefaultCollectionDefinition, move SetMetadata to non-sealed VectorStoreWriter Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com> --- .../Writers/IngestedChunkRecord.cs | 18 ++----------- .../Writers/VectorStoreWriter.cs | 19 ++++++++++++-- .../IngestionPipelineTests.cs | 8 +++--- .../Writers/TestChunkRecordWithMetadata.cs | 10 -------- .../TestVectorStoreWriterWithMetadata.cs | 25 +++++++++++++++++++ .../Writers/VectorStoreWriterTests.cs | 12 ++++----- 6 files changed, 54 insertions(+), 38 deletions(-) create mode 100644 test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestVectorStoreWriterWithMetadata.cs diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs index fc1e657c59f..b36a1b6b391 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -11,7 +11,7 @@ namespace Microsoft.Extensions.DataIngestion; /// /// The type of the chunk content. /// -/// When the vector dimension count is not known at compile time, use the +/// When the vector dimension count is not known at compile time, use the /// helper to create a and pass it to the vector store collection constructor. /// When the vector dimension count is known at compile time, derive from this class and add /// the to the property. @@ -40,7 +40,7 @@ public class IngestedChunkRecord /// A suitable for creating a vector store collection. /// is less than or equal to zero. #pragma warning disable CA1000 // Do not declare static members on generic types - needs access to TChunk type parameter - public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null) + public static VectorStoreCollectionDefinition CreateDefaultCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null) #pragma warning restore CA1000 { _ = Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0); @@ -103,18 +103,4 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim /// the with the appropriate dimension count. /// public virtual TChunk? Embedding => Content; - - /// - /// Sets a metadata value on the record. - /// - /// The metadata key. - /// The metadata value. - /// - /// Override this method in derived classes to store metadata as typed properties with - /// attributes. - /// - public virtual void SetMetadata(string key, object? value) - { - throw new NotSupportedException($"Metadata key '{key}' is not supported. Override {nameof(SetMetadata)} in a derived class to handle metadata."); - } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs index dee73b6a86e..23b09f64086 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs @@ -15,7 +15,7 @@ namespace Microsoft.Extensions.DataIngestion; /// /// The type of the chunk content. /// The type of the record stored in the vector store. -public sealed class VectorStoreWriter : IngestionChunkWriter +public class VectorStoreWriter : IngestionChunkWriter where TRecord : IngestedChunkRecord, new() { private readonly VectorStoreWriterOptions _options; @@ -72,7 +72,7 @@ public override async Task WriteAsync(IAsyncEnumerable> c { foreach (var metadata in chunk.Metadata) { - record.SetMetadata(metadata.Key, metadata.Value); + SetMetadata(record, metadata.Key, metadata.Value); } } @@ -104,6 +104,21 @@ public override async Task WriteAsync(IAsyncEnumerable> c } } + /// + /// Sets a metadata value on the record. + /// + /// The record on which to set the metadata. + /// The metadata key. + /// The metadata value. + /// + /// Override this method in derived classes to store metadata as typed properties with + /// attributes. + /// + protected virtual void SetMetadata(TRecord record, string key, object? value) + { + throw new NotSupportedException($"Metadata key '{key}' is not supported. Override {nameof(SetMetadata)} in a derived class to handle metadata."); + } + private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken) { if (!_options.IncrementalIngestion) diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs index cba7f497857..e20061cbf63 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs @@ -86,7 +86,7 @@ public async Task CanProcessDocuments() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); var collection = testVectorStore.GetCollection>("chunks", definition); using VectorStoreWriter> vectorStoreWriter = new(collection); @@ -122,7 +122,7 @@ public async Task CanProcessDocumentsInDirectory() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); var collection = testVectorStore.GetCollection>("chunks-dir", definition); using VectorStoreWriter> vectorStoreWriter = new(collection); @@ -159,7 +159,7 @@ public async Task ChunksCanBeMoreThanJustText() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); var collection = testVectorStore.GetCollection>("chunks-img", definition); using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter); @@ -211,7 +211,7 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); var collection = testVectorStore.GetCollection>("chunks-fail", definition); using VectorStoreWriter> vectorStoreWriter = new(collection); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs index a5949291b3a..7a62230d815 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs @@ -14,14 +14,4 @@ public class TestChunkRecordWithMetadata : IngestedChunkRecord [VectorStoreData(StorageName = "classification")] public string? Classification { get; set; } - - public override void SetMetadata(string key, object? value) - { - switch (key) - { - case nameof(Classification): - Classification = value as string; - break; - } - } } diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestVectorStoreWriterWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestVectorStoreWriterWithMetadata.cs new file mode 100644 index 00000000000..ec9a7e9174a --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestVectorStoreWriterWithMetadata.cs @@ -0,0 +1,25 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using Microsoft.Extensions.VectorData; + +namespace Microsoft.Extensions.DataIngestion.Writers.Tests; + +public class TestVectorStoreWriterWithMetadata : VectorStoreWriter +{ + public TestVectorStoreWriterWithMetadata(VectorStoreCollection collection, VectorStoreWriterOptions? options = default) + : base(collection, options) + { + } + + protected override void SetMetadata(TestChunkRecordWithMetadata record, string key, object? value) + { + switch (key) + { + case nameof(TestChunkRecordWithMetadata.Classification): + record.Classification = value as string; + break; + } + } +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs index 17066077e2b..82fc540adf6 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs @@ -21,7 +21,7 @@ public async Task CanWriteChunksWithCustomDefinition() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - // User creates their own definition without using CreateCollectionDefinition, + // User creates their own definition without using CreateDefaultCollectionDefinition, // using custom storage names to prove they can map to a pre-existing collection schema. VectorStoreCollectionDefinition definition = new() { @@ -71,7 +71,7 @@ public async Task CanWriteChunks() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); var collection = vectorStore.GetCollection>("chunks", definition); using VectorStoreWriter> writer = new(collection); @@ -104,7 +104,7 @@ public async Task CanWriteChunksWithMetadata() using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); var collection = vectorStore.GetCollection("chunks-meta"); - using VectorStoreWriter writer = new(collection); + using TestVectorStoreWriterWithMetadata writer = new(collection); IngestionDocument document = new(documentId); IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document); @@ -132,7 +132,7 @@ public async Task DoesSupportIncrementalIngestion() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); var collection = vectorStore.GetCollection>("chunks-incr", definition); using VectorStoreWriter> writer = new( @@ -202,7 +202,7 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts) options.BatchTokenCount = batchTokenCount.Value; } - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); var collection = vectorStore.GetCollection>("chunks-batch", definition); using VectorStoreWriter> writer = new( @@ -233,7 +233,7 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount); + var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); var collection = vectorStore.GetCollection>("chunks-many", definition); using VectorStoreWriter> writer = new( From 4558e6c3a2cf84bce471cd676b9046c62e95df28 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 19 Mar 2026 18:08:52 +0100 Subject: [PATCH 14/14] provide a VectorStore extension method to create VectorStoreCollection with provided dimension count --- .../Writers/IngestedChunkRecord.cs | 57 +++--------------- .../Writers/VectorStoreExtensions.cs | 60 +++++++++++++++++++ .../Writers/VectorStoreWriter.cs | 4 ++ .../IngestionPipelineTests.cs | 16 ++--- .../Writers/VectorStoreWriterTests.cs | 26 ++++---- 5 files changed, 93 insertions(+), 70 deletions(-) create mode 100644 src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreExtensions.cs diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs index b36a1b6b391..31836464501 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs @@ -11,64 +11,23 @@ namespace Microsoft.Extensions.DataIngestion; /// /// The type of the chunk content. /// -/// When the vector dimension count is not known at compile time, use the -/// helper to create a and pass it to the vector store collection constructor. +/// When the vector dimension count is not known at compile time, +/// use the +/// helper to create a and pass it to the constructor. /// When the vector dimension count is known at compile time, derive from this class and add /// the to the property. /// public class IngestedChunkRecord { - private const string KeyStorageName = "key"; - private const string DocumentIdStorageName = "documentid"; - private const string ContentStorageName = "content"; - private const string ContextStorageName = "context"; - /// /// The storage name for the property. /// - protected const string EmbeddingStorageName = "embedding"; + protected const string EmbeddingStorageName = VectorStoreExtensions.EmbeddingStorageName; - /// - /// Creates a for . - /// - /// The number of dimensions that the vector has. - /// - /// The distance function to use. When not provided, the default specific to given database will be used. - /// Check for available values. - /// - /// The index kind to use. - /// A suitable for creating a vector store collection. - /// is less than or equal to zero. -#pragma warning disable CA1000 // Do not declare static members on generic types - needs access to TChunk type parameter - public static VectorStoreCollectionDefinition CreateDefaultCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null) -#pragma warning restore CA1000 - { - _ = Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0); - - return new VectorStoreCollectionDefinition - { - Properties = - { - new VectorStoreKeyProperty(nameof(Key), typeof(Guid)) { StorageName = KeyStorageName }, - - // By using TChunk as the type here we allow the vector store - // to handle the conversion from TChunk to the actual vector type it supports. - new VectorStoreVectorProperty(nameof(Embedding), typeof(TChunk), dimensionCount) - { - StorageName = EmbeddingStorageName, - DistanceFunction = distanceFunction, - IndexKind = indexKind, - }, - new VectorStoreDataProperty(nameof(Content), typeof(TChunk)) { StorageName = ContentStorageName }, - new VectorStoreDataProperty(nameof(Context), typeof(string)) { StorageName = ContextStorageName }, - new VectorStoreDataProperty(nameof(DocumentId), typeof(string)) - { - StorageName = DocumentIdStorageName, - IsIndexed = true, - }, - }, - }; - } + private const string KeyStorageName = "key"; + private const string DocumentIdStorageName = "documentid"; + private const string ContentStorageName = "content"; + private const string ContextStorageName = "context"; /// /// Gets or sets the unique key for this record. diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreExtensions.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreExtensions.cs new file mode 100644 index 00000000000..7bc53ec7357 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreExtensions.cs @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Extensions.VectorData; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Provides extension methods for working with vector stores in the context of data ingestion. +/// +public static class VectorStoreExtensions +{ + internal const string EmbeddingStorageName = "embedding"; + + /// + /// Provides a convenient method to get a vector store collection specifically designed for storing ingested chunk records. + /// + /// The type of the record to be stored in the collection. + /// The type of the chunk content. + /// The vector store instance to create the collection in. + /// The name of the collection to be created. + /// The number of dimensions that the vector has. + /// The storage name for the vector property. + /// + /// The distance function to use. When not provided, the default specific to given database will be used. + /// Check for available values. + /// + /// The index kind to use. + /// A vector store collection configured for ingested chunk records. + [RequiresDynamicCode("This API is not compatible with NativeAOT. You can implement your own IngestionChunkWriter that uses dynamic mapping via VectorStore.GetCollectionDynamic().")] + [RequiresUnreferencedCode("This API is not compatible with trimming. You can implement your own IngestionChunkWriter that uses dynamic mapping via VectorStore.GetCollectionDynamic().")] + public static VectorStoreCollection GetIngestionRecordCollection(this VectorStore vectorStore, + string collectionName, int dimensionCount, string storageName = EmbeddingStorageName, string? distanceFunction = null, string? indexKind = null) + where TRecord : IngestedChunkRecord, new() + { + _ = Shared.Diagnostics.Throw.IfNull(vectorStore); + _ = Shared.Diagnostics.Throw.IfNullOrEmpty(collectionName); + _ = Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0); + _ = Shared.Diagnostics.Throw.IfNullOrEmpty(storageName); + + VectorStoreCollectionDefinition additiveDefintion = new() + { + Properties = + { + // By using TChunk as the type here we allow the vector store + // to handle the conversion from TChunk to the actual vector type it supports. + new VectorStoreVectorProperty(nameof(IngestedChunkRecord<>.Embedding), typeof(TChunk), dimensionCount) + { + StorageName = storageName, + DistanceFunction = distanceFunction, + IndexKind = indexKind, + }, + }, + }; + + return vectorStore.GetCollection(collectionName, additiveDefintion); + } +} diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs index 23b09f64086..8182892bb43 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs @@ -27,6 +27,10 @@ public class VectorStoreWriter : IngestionChunkWriter /// The to use to store the instances. /// The options for the vector store writer. /// When is null. + /// + /// You can use the + /// helper to create a with the appropriate schema for storing ingestion chunks. + /// public VectorStoreWriter(VectorStoreCollection collection, VectorStoreWriterOptions? options = default) { VectorStoreCollection = Throw.IfNull(collection); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs index e20061cbf63..3fd5439f848 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs @@ -86,8 +86,8 @@ public async Task CanProcessDocuments() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = testVectorStore.GetCollection>("chunks", definition); + var collection = testVectorStore.GetIngestionRecordCollection, string>( + "chunks", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); @@ -122,8 +122,8 @@ public async Task CanProcessDocumentsInDirectory() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = testVectorStore.GetCollection>("chunks-dir", definition); + var collection = testVectorStore.GetIngestionRecordCollection, string>( + "chunks-dir", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter); @@ -159,8 +159,8 @@ public async Task ChunksCanBeMoreThanJustText() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = testVectorStore.GetCollection>("chunks-img", definition); + var collection = testVectorStore.GetIngestionRecordCollection, DataContent>( + "chunks-img", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter); @@ -211,8 +211,8 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline() TestEmbeddingGenerator embeddingGenerator = new(); using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator }); - var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = testVectorStore.GetCollection>("chunks-fail", definition); + var collection = testVectorStore.GetIngestionRecordCollection, string>( + "chunks-fail", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> vectorStoreWriter = new(collection); using IngestionPipeline pipeline = new(failingForFirstReader, CreateChunker(), vectorStoreWriter); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs index 82fc540adf6..d725c548f74 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs @@ -27,14 +27,14 @@ public async Task CanWriteChunksWithCustomDefinition() { Properties = { - new VectorStoreKeyProperty(nameof(IngestedChunkRecord.Key), typeof(Guid)) { StorageName = "custom_key" }, - new VectorStoreVectorProperty(nameof(IngestedChunkRecord.Embedding), typeof(string), TestEmbeddingGenerator.DimensionCount) + new VectorStoreKeyProperty(nameof(IngestedChunkRecord<>.Key), typeof(Guid)) { StorageName = "custom_key" }, + new VectorStoreVectorProperty(nameof(IngestedChunkRecord<>.Embedding), typeof(string), TestEmbeddingGenerator.DimensionCount) { StorageName = "custom_embedding", }, - new VectorStoreDataProperty(nameof(IngestedChunkRecord.Content), typeof(string)) { StorageName = "custom_content" }, - new VectorStoreDataProperty(nameof(IngestedChunkRecord.Context), typeof(string)) { StorageName = "custom_context" }, - new VectorStoreDataProperty(nameof(IngestedChunkRecord.DocumentId), typeof(string)) + new VectorStoreDataProperty(nameof(IngestedChunkRecord<>.Content), typeof(string)) { StorageName = "custom_content" }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord<>.Context), typeof(string)) { StorageName = "custom_context" }, + new VectorStoreDataProperty(nameof(IngestedChunkRecord<>.DocumentId), typeof(string)) { StorageName = "custom_documentid", IsIndexed = true, @@ -71,8 +71,8 @@ public async Task CanWriteChunks() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = vectorStore.GetCollection>("chunks", definition); + var collection = vectorStore.GetIngestionRecordCollection, string>( + "chunks", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> writer = new(collection); @@ -132,8 +132,8 @@ public async Task DoesSupportIncrementalIngestion() using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = vectorStore.GetCollection>("chunks-incr", definition); + var collection = vectorStore.GetIngestionRecordCollection, string>( + "chunks-incr", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> writer = new( collection, @@ -202,8 +202,8 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts) options.BatchTokenCount = batchTokenCount.Value; } - var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = vectorStore.GetCollection>("chunks-batch", definition); + var collection = vectorStore.GetIngestionRecordCollection, string>( + "chunks-batch", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> writer = new( collection, @@ -233,8 +233,8 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun using TestEmbeddingGenerator testEmbeddingGenerator = new(); using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator); - var definition = IngestedChunkRecord.CreateDefaultCollectionDefinition(TestEmbeddingGenerator.DimensionCount); - var collection = vectorStore.GetCollection>("chunks-many", definition); + var collection = vectorStore.GetIngestionRecordCollection, string>( + "chunks-many", TestEmbeddingGenerator.DimensionCount); using VectorStoreWriter> writer = new( collection,