From ea56d337fb0238e48fa8ab4b1c574ad016685a47 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 20:50:44 +0000
Subject: [PATCH 01/14] Initial plan
From 581bce019cc997295b6016d5f3919ffb721c9c27 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 21:27:58 +0000
Subject: [PATCH 02/14] Refactor VectorStoreWriter to accept
VectorStoreCollection, add IngestedChunkRecord base type
Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com>
---
.../Writers/IngestedChunkRecord.cs | 139 +++++++++++++++++
.../Writers/VectorStoreWriter.cs | 144 +++++-------------
.../Writers/VectorStoreWriterOptions.cs | 24 +--
.../Components/Pages/Chat/Chat.razor | 2 +-
.../Services/IngestedChunk.cs | 29 ++--
.../Services/Ingestion/DataIngestor.cs | 14 +-
6 files changed, 201 insertions(+), 151 deletions(-)
create mode 100644 src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
new file mode 100644
index 00000000000..dd24d8edd18
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
@@ -0,0 +1,139 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Text.Json.Serialization;
+using Microsoft.Extensions.VectorData;
+
+namespace Microsoft.Extensions.DataIngestion;
+
+///
+/// Represents the base record type used by to store ingested chunks in a vector store.
+///
+/// The type of the key for the record.
+/// The type of the chunk content.
+///
+/// When the vector dimension count is not known at compile time, use the
+/// helper to create a and pass it to the vector store collection constructor.
+/// When the vector dimension count is known at compile time, derive from this class and add
+/// the to the property.
+///
+public class IngestedChunkRecord
+{
+ ///
+ /// The storage name for the property.
+ ///
+ public const string KeyPropertyName = "key";
+
+ ///
+ /// The storage name for the property.
+ ///
+ public const string DocumentIdPropertyName = "documentid";
+
+ ///
+ /// The storage name for the property.
+ ///
+ public const string ContentPropertyName = "content";
+
+ ///
+ /// The storage name for the property.
+ ///
+ public const string ContextPropertyName = "context";
+
+ ///
+ /// The storage name for the property.
+ ///
+ public const string EmbeddingPropertyName = "embedding";
+
+ ///
+ /// Gets or sets the unique key for this record.
+ ///
+ [VectorStoreKey(StorageName = KeyPropertyName)]
+ [JsonPropertyName(KeyPropertyName)]
+ public TKey Key { get; set; } = default!;
+
+ ///
+ /// Gets or sets the identifier of the document from which this chunk was extracted.
+ ///
+ [VectorStoreData(StorageName = DocumentIdPropertyName)]
+ [JsonPropertyName(DocumentIdPropertyName)]
+ public string DocumentId { get; set; } = string.Empty;
+
+ ///
+ /// Gets or sets the content of the chunk.
+ ///
+ [VectorStoreData(StorageName = ContentPropertyName)]
+ [JsonPropertyName(ContentPropertyName)]
+ public TChunk? Content { get; set; }
+
+ ///
+ /// Gets or sets additional context for the chunk.
+ ///
+ [VectorStoreData(StorageName = ContextPropertyName)]
+ [JsonPropertyName(ContextPropertyName)]
+ public string? Context { get; set; }
+
+ ///
+ /// Gets the embedding value for this record.
+ ///
+ ///
+ /// By default, returns the value. The vector store's embedding generator
+ /// will convert this to a vector. Override this property in derived classes to add
+ /// the with the appropriate dimension count.
+ ///
+ [JsonPropertyName(EmbeddingPropertyName)]
+ public virtual TChunk? Embedding => Content;
+
+ ///
+ /// Sets a metadata value on the record.
+ ///
+ /// The metadata key.
+ /// The metadata value.
+ ///
+ /// Override this method in derived classes to store metadata as typed properties with
+ /// attributes. The default implementation is a no-op.
+ ///
+ public virtual void SetMetadata(string key, object? value)
+ {
+ // Default implementation: no-op.
+ // Derived classes can override to map metadata keys to typed properties.
+ }
+
+ ///
+ /// Creates a for .
+ ///
+ /// The number of dimensions that the vector has.
+ ///
+ /// The distance function to use. When not provided, the default specific to given database will be used.
+ /// Check for available values.
+ ///
+ /// The index kind to use.
+ /// A suitable for creating a vector store collection.
+ /// is less than or equal to zero.
+ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null)
+ {
+ Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0);
+
+ return new VectorStoreCollectionDefinition
+ {
+ Properties =
+ {
+ new VectorStoreKeyProperty(KeyPropertyName, typeof(TKey)),
+
+ // By using TChunk as the type here we allow the vector store
+ // to handle the conversion from TChunk to the actual vector type it supports.
+ new VectorStoreVectorProperty(EmbeddingPropertyName, typeof(TChunk), dimensionCount)
+ {
+ DistanceFunction = distanceFunction,
+ IndexKind = indexKind,
+ },
+ new VectorStoreDataProperty(ContentPropertyName, typeof(TChunk)),
+ new VectorStoreDataProperty(ContextPropertyName, typeof(string)),
+ new VectorStoreDataProperty(DocumentIdPropertyName, typeof(string))
+ {
+ IsIndexed = true,
+ },
+ },
+ };
+ }
+}
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
index a10a6595095..d373843af86 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
@@ -11,66 +11,50 @@
namespace Microsoft.Extensions.DataIngestion;
///
-/// Writes chunks to the using the default schema.
+/// Writes chunks to a .
///
-/// The type of the chunk content.
-public sealed class VectorStoreWriter : IngestionChunkWriter
+/// The type of the key for the record.
+/// The type of the chunk content.
+/// The type of the record stored in the vector store.
+public sealed class VectorStoreWriter : IngestionChunkWriter
+ where TKey : notnull
+ where TRecord : IngestedChunkRecord, new()
{
- // The names are lowercase with no special characters to ensure compatibility with various vector stores.
- private const string KeyName = "key";
- private const string EmbeddingName = "embedding";
- private const string ContentName = "content";
- private const string ContextName = "context";
- private const string DocumentIdName = "documentid";
-
- private readonly VectorStore _vectorStore;
- private readonly int _dimensionCount;
private readonly VectorStoreWriterOptions _options;
-
- private VectorStoreCollection>? _vectorStoreCollection;
+ private bool _collectionEnsured;
///
- /// Initializes a new instance of the class.
+ /// Initializes a new instance of the class.
///
- /// The to use to store the instances.
- /// The number of dimensions that the vector has. This value is required when creating collections.
+ /// The to use to store the instances.
/// The options for the vector store writer.
- /// When is null.
- /// When is less or equal zero.
- public VectorStoreWriter(VectorStore vectorStore, int dimensionCount, VectorStoreWriterOptions? options = default)
+ /// When is null.
+ public VectorStoreWriter(VectorStoreCollection collection, VectorStoreWriterOptions? options = default)
{
- _vectorStore = Throw.IfNull(vectorStore);
- _dimensionCount = Throw.IfLessThanOrEqual(dimensionCount, 0);
+ VectorStoreCollection = Throw.IfNull(collection);
_options = options ?? new VectorStoreWriterOptions();
}
///
/// Gets the underlying used to store the chunks.
///
- ///
- /// The collection is initialized when is called for the first time.
- ///
- /// The collection has not been initialized yet.
- /// Call first.
- public VectorStoreCollection> VectorStoreCollection
- => _vectorStoreCollection ?? throw new InvalidOperationException("The collection has not been initialized yet. Call WriteAsync first.");
+ public VectorStoreCollection VectorStoreCollection { get; }
///
- public override async Task WriteAsync(IAsyncEnumerable> chunks, CancellationToken cancellationToken = default)
+ public override async Task WriteAsync(IAsyncEnumerable> chunks, CancellationToken cancellationToken = default)
{
_ = Throw.IfNull(chunks);
- IReadOnlyList? preExistingKeys = null;
- List>? batch = null;
+ IReadOnlyList? preExistingKeys = null;
+ List? batch = null;
long currentBatchTokenCount = 0;
- await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken))
+ await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken))
{
- if (_vectorStoreCollection is null)
+ if (!_collectionEnsured)
{
- _vectorStoreCollection = _vectorStore.GetDynamicCollection(_options.CollectionName, GetVectorStoreRecordDefinition(chunk));
-
- await _vectorStoreCollection.EnsureCollectionExistsAsync(cancellationToken).ConfigureAwait(false);
+ await VectorStoreCollection.EnsureCollectionExistsAsync(cancellationToken).ConfigureAwait(false);
+ _collectionEnsured = true;
}
// We obtain the IDs of the pre-existing chunks for given document,
@@ -78,21 +62,19 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks
// to avoid a situation where we delete the chunks and then fail to insert the new ones.
preExistingKeys ??= await GetPreExistingChunksIdsAsync(chunk.Document, cancellationToken).ConfigureAwait(false);
- var key = Guid.NewGuid();
- Dictionary record = new()
+ TRecord record = new()
{
- [KeyName] = key,
- [ContentName] = chunk.Content,
- [EmbeddingName] = chunk.Content,
- [ContextName] = chunk.Context,
- [DocumentIdName] = chunk.Document.Identifier,
+ Key = GenerateKey(),
+ Content = chunk.Content,
+ Context = chunk.Context,
+ DocumentId = chunk.Document.Identifier,
};
if (chunk.HasMetadata)
{
foreach (var metadata in chunk.Metadata)
{
- record[metadata.Key] = metadata.Value;
+ record.SetMetadata(metadata.Key, metadata.Value);
}
}
@@ -102,7 +84,7 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks
// If the batch is empty or the chunk alone exceeds the limit, add it anyway.
if (batch.Count > 0 && currentBatchTokenCount + chunk.TokenCount > _options.BatchTokenCount)
{
- await _vectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false);
+ await VectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false);
batch.Clear();
currentBatchTokenCount = 0;
@@ -115,75 +97,31 @@ public override async Task WriteAsync(IAsyncEnumerable> chunks
// Upsert any remaining chunks in the batch
if (batch?.Count > 0)
{
- await _vectorStoreCollection!.UpsertAsync(batch, cancellationToken).ConfigureAwait(false);
+ await VectorStoreCollection.UpsertAsync(batch, cancellationToken).ConfigureAwait(false);
}
if (preExistingKeys?.Count > 0)
{
- await _vectorStoreCollection!.DeleteAsync(preExistingKeys, cancellationToken).ConfigureAwait(false);
+ await VectorStoreCollection.DeleteAsync(preExistingKeys, cancellationToken).ConfigureAwait(false);
}
}
- ///
- protected override void Dispose(bool disposing)
+ private static TKey GenerateKey()
{
- try
- {
- _vectorStoreCollection?.Dispose();
- }
- finally
+ if (typeof(TKey) == typeof(Guid))
{
- _vectorStore.Dispose();
- base.Dispose(disposing);
+ return (TKey)(object)Guid.NewGuid();
}
- }
-
- private VectorStoreCollectionDefinition GetVectorStoreRecordDefinition(IngestionChunk representativeChunk)
- {
- VectorStoreCollectionDefinition definition = new()
- {
- Properties =
- {
- new VectorStoreKeyProperty(KeyName, typeof(Guid)),
- // By using T as the type here we allow the vector store
- // to handle the conversion from T to the actual vector type it supports.
- new VectorStoreVectorProperty(EmbeddingName, typeof(T), _dimensionCount)
- {
- DistanceFunction = _options.DistanceFunction,
- IndexKind = _options.IndexKind
- },
- new VectorStoreDataProperty(ContentName, typeof(T)),
- new VectorStoreDataProperty(ContextName, typeof(string)),
- new VectorStoreDataProperty(DocumentIdName, typeof(string))
- {
- IsIndexed = true
- }
- }
- };
-
- if (representativeChunk.HasMetadata)
+ if (typeof(TKey) == typeof(string))
{
- foreach (var metadata in representativeChunk.Metadata)
- {
- Type propertyType = metadata.Value.GetType();
- definition.Properties.Add(new VectorStoreDataProperty(metadata.Key, propertyType)
- {
- // We use lowercase storage names to ensure compatibility with various vector stores.
-#pragma warning disable CA1308 // Normalize strings to uppercase
- StorageName = metadata.Key.ToLowerInvariant()
-#pragma warning restore CA1308 // Normalize strings to uppercase
-
- // We could consider indexing for certain keys like classification etc. but for now we leave it as non-indexed.
- // The reason is that not every DB supports it, moreover we would need to expose the ability to configure it.
- });
- }
+ return (TKey)(object)Guid.NewGuid().ToString();
}
- return definition;
+ throw new NotSupportedException($"Automatic key generation is not supported for key type '{typeof(TKey)}'. Supported key types are Guid and string.");
}
- private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken)
+ private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken)
{
if (!_options.IncrementalIngestion)
{
@@ -193,19 +131,19 @@ private async Task> GetPreExistingChunksIdsAsync(Ingestion
// Each Vector Store has a different max top count limit, so we use low value and loop.
const int MaxTopCount = 1_000;
- List keys = [];
+ List keys = [];
int insertedCount;
do
{
insertedCount = 0;
- await foreach (var record in _vectorStoreCollection!.GetAsync(
- filter: record => (string)record[DocumentIdName]! == document.Identifier,
+ await foreach (var record in VectorStoreCollection.GetAsync(
+ filter: record => record.DocumentId == document.Identifier,
top: MaxTopCount,
options: new() { Skip = keys.Count },
cancellationToken: cancellationToken).ConfigureAwait(false))
{
- keys.Add(record[KeyName]!);
+ keys.Add(record.Key);
insertedCount++;
}
}
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs
index e7a7b5e6c79..3799c3a56cc 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriterOptions.cs
@@ -7,34 +7,12 @@
namespace Microsoft.Extensions.DataIngestion;
///
-/// Represents options for the .
+/// Represents options for the .
///
public sealed class VectorStoreWriterOptions
{
private const int DefaultBatchTokenCount = 256 * IngestionChunkerOptions.DefaultTokensPerChunk;
- ///
- /// Gets or sets the name of the collection. When not provided, "chunks" will be used.
- ///
- public string CollectionName
- {
- get => field ?? "chunks";
- set => field = Throw.IfNullOrEmpty(value);
- }
-
- ///
- /// Gets or sets the distance function to use when creating the collection.
- ///
- ///
- /// When not provided, the default specific to given database will be used. Check for available values.
- ///
- public string? DistanceFunction { get; set; }
-
- ///
- /// Gets or sets the index kind to use when creating the collection.
- ///
- public string? IndexKind { get; set; }
-
///
/// Gets or sets a value indicating whether to perform incremental ingestion.
///
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor
index 6fc5881c18f..6e5b4212bd0 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Components/Pages/Chat/Chat.razor
@@ -126,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text} ");
+ $"{result.Content} ");
}
public void Dispose()
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs
index 60e6b5684e4..ae1ac018622 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs
@@ -1,9 +1,14 @@
using System.Text.Json.Serialization;
+using Microsoft.Extensions.DataIngestion;
using Microsoft.Extensions.VectorData;
namespace AIChatWeb_CSharp.Web.Services;
-public class IngestedChunk
+#if (IsQdrant)
+public class IngestedChunk : IngestedChunkRecord
+#else
+public class IngestedChunk : IngestedChunkRecord
+#endif
{
#if (IsOllama)
public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model
@@ -17,23 +22,7 @@ public class IngestedChunk
#endif
public const string CollectionName = "data-AIChatWeb-CSharp.Web-chunks";
- [VectorStoreKey(StorageName = "key")]
- [JsonPropertyName("key")]
- public required Guid Key { get; set; }
-
- [VectorStoreData(StorageName = "documentid")]
- [JsonPropertyName("documentid")]
- public required string DocumentId { get; set; }
-
- [VectorStoreData(StorageName = "content")]
- [JsonPropertyName("content")]
- public required string Text { get; set; }
-
- [VectorStoreData(StorageName = "context")]
- [JsonPropertyName("context")]
- public string? Context { get; set; }
-
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
- [JsonPropertyName("embedding")]
- public string? Vector => Text;
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)]
+ [JsonPropertyName(EmbeddingPropertyName)]
+ public override string? Embedding => Content;
}
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
index c573403e618..5d9cffe29a4 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
@@ -9,15 +9,21 @@ namespace AIChatWeb_CSharp.Web.Services.Ingestion;
public class DataIngestor(
ILogger logger,
ILoggerFactory loggerFactory,
- VectorStore vectorStore,
+#if (IsQdrant)
+ VectorStoreCollection vectorCollection,
+#else
+ VectorStoreCollection vectorCollection,
+#endif
IEmbeddingGenerator> embeddingGenerator)
{
public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
+#if (IsQdrant)
+ using var writer = new VectorStoreWriter(vectorCollection, new()
+#else
+ using var writer = new VectorStoreWriter(vectorCollection, new()
+#endif
{
- CollectionName = IngestedChunk.CollectionName,
- DistanceFunction = IngestedChunk.VectorDistanceFunction,
IncrementalIngestion = false,
});
From 727bcd5b468d063aa773a7f1ae07cd6df746d971 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 21:31:18 +0000
Subject: [PATCH 03/14] Update all test files, snapshot files, and template
files for VectorStoreWriter refactoring
Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com>
---
.../IngestionPipelineTests.cs | 43 ++++---
.../Writers/TestChunkRecordWithMetadata.cs | 31 +++++
.../Writers/VectorStoreWriterOptionsTests.cs | 1 -
.../Writers/VectorStoreWriterTests.cs | 110 +++++++++++-------
.../Components/Pages/Chat/Chat.razor | 2 +-
.../aichatweb.Web/Services/IngestedChunk.cs | 25 +---
.../Services/Ingestion/DataIngestor.cs | 6 +-
.../Components/Pages/Chat/Chat.razor | 2 +-
.../aichatweb.Web/Services/IngestedChunk.cs | 25 +---
.../Services/Ingestion/DataIngestor.cs | 6 +-
.../Components/Pages/Chat/Chat.razor | 2 +-
.../aichatweb/Services/IngestedChunk.cs | 25 +---
.../Services/Ingestion/DataIngestor.cs | 6 +-
.../Components/Pages/Chat/Chat.razor | 2 +-
.../aichatweb.Web/Services/IngestedChunk.cs | 25 +---
.../Services/Ingestion/DataIngestor.cs | 6 +-
.../Components/Pages/Chat/Chat.razor | 2 +-
.../aichatweb/Services/IngestedChunk.cs | 25 +---
.../Services/Ingestion/DataIngestor.cs | 6 +-
19 files changed, 167 insertions(+), 183 deletions(-)
create mode 100644 test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
index 272ccd510a4..a2365b1a02e 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
@@ -9,6 +9,7 @@
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
+using Microsoft.Extensions.VectorData;
using Microsoft.ML.Tokenizers;
using Microsoft.SemanticKernel.Connectors.InMemory;
using OpenTelemetry;
@@ -84,7 +85,10 @@ public async Task CanProcessDocuments()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount);
+
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = testVectorStore.GetCollection>("chunks", definition);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter);
List ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync();
@@ -95,15 +99,15 @@ public async Task CanProcessDocuments()
Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called.");
var retrieved = await vectorStoreWriter.VectorStoreCollection
- .GetAsync(record => _sampleFiles.Any(info => info.FullName == (string)record["documentid"]!), top: 1000)
+ .GetAsync(record => _sampleFiles.Any(info => info.FullName == record.DocumentId), top: 1000)
.ToListAsync();
Assert.NotEmpty(retrieved);
for (int i = 0; i < retrieved.Count; i++)
{
- Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!);
- Assert.NotEmpty((string)retrieved[i]["content"]!);
- Assert.Contains((string)retrieved[i]["documentid"]!, _sampleFiles.Select(info => info.FullName));
+ Assert.NotEqual(Guid.Empty, retrieved[i].Key);
+ Assert.NotEmpty((string)retrieved[i].Content!);
+ Assert.Contains(retrieved[i].DocumentId, _sampleFiles.Select(info => info.FullName));
}
AssertActivities(activities, "ProcessFiles");
@@ -117,7 +121,10 @@ public async Task CanProcessDocumentsInDirectory()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount);
+
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = testVectorStore.GetCollection>("chunks-dir", definition);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter);
@@ -129,15 +136,15 @@ public async Task CanProcessDocumentsInDirectory()
Assert.True(embeddingGenerator.WasCalled, "Embedding generator should have been called.");
var retrieved = await vectorStoreWriter.VectorStoreCollection
- .GetAsync(record => ((string)record["documentid"]!).StartsWith(directory.FullName), top: 1000)
+ .GetAsync(record => record.DocumentId.StartsWith(directory.FullName), top: 1000)
.ToListAsync();
Assert.NotEmpty(retrieved);
for (int i = 0; i < retrieved.Count; i++)
{
- Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!);
- Assert.NotEmpty((string)retrieved[i]["content"]!);
- Assert.StartsWith(directory.FullName, (string)retrieved[i]["documentid"]!);
+ Assert.NotEqual(Guid.Empty, retrieved[i].Key);
+ Assert.NotEmpty((string)retrieved[i].Content!);
+ Assert.StartsWith(directory.FullName, retrieved[i].DocumentId);
}
AssertActivities(activities, "ProcessDirectory");
@@ -151,7 +158,10 @@ public async Task ChunksCanBeMoreThanJustText()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount);
+
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = testVectorStore.GetCollection>("chunks-img", definition);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter);
Assert.False(embeddingGenerator.WasCalled);
@@ -159,15 +169,15 @@ public async Task ChunksCanBeMoreThanJustText()
AssertAllIngestionsSucceeded(ingestionResults);
var retrieved = await vectorStoreWriter.VectorStoreCollection
- .GetAsync(record => ((string)record["documentid"]!).EndsWith(_withImage.Name), top: 100)
+ .GetAsync(record => record.DocumentId.EndsWith(_withImage.Name), top: 100)
.ToListAsync();
Assert.True(embeddingGenerator.WasCalled);
Assert.NotEmpty(retrieved);
for (int i = 0; i < retrieved.Count; i++)
{
- Assert.NotEqual(Guid.Empty, (Guid)retrieved[i]["key"]!);
- Assert.EndsWith(_withImage.Name, (string)retrieved[i]["documentid"]!);
+ Assert.NotEqual(Guid.Empty, retrieved[i].Key);
+ Assert.EndsWith(_withImage.Name, retrieved[i].DocumentId);
}
AssertActivities(activities, "ProcessFiles");
@@ -200,7 +210,10 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- using VectorStoreWriter vectorStoreWriter = new(testVectorStore, dimensionCount: TestEmbeddingGenerator.DimensionCount);
+
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = testVectorStore.GetCollection>("chunks-fail", definition);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
using IngestionPipeline pipeline = new(failingForFirstReader, CreateChunker(), vectorStoreWriter);
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs
new file mode 100644
index 00000000000..bc283241509
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Text.Json.Serialization;
+using Microsoft.Extensions.VectorData;
+
+namespace Microsoft.Extensions.DataIngestion.Writers.Tests;
+
+public class TestChunkRecordWithMetadata : IngestedChunkRecord
+{
+ public const int TestDimensionCount = 4;
+
+ [VectorStoreVector(TestDimensionCount, StorageName = EmbeddingPropertyName)]
+ [JsonPropertyName(EmbeddingPropertyName)]
+ public override string? Embedding => Content;
+
+ [VectorStoreData(StorageName = "classification")]
+ [JsonPropertyName("classification")]
+ public string? Classification { get; set; }
+
+ public override void SetMetadata(string key, object? value)
+ {
+ switch (key)
+ {
+ case nameof(Classification):
+ Classification = value as string;
+ break;
+ }
+ }
+}
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs
index 013b24352f4..8612ca933b5 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterOptionsTests.cs
@@ -13,7 +13,6 @@ public void DefaultValues_ShouldBeSetCorrectly()
{
VectorStoreWriterOptions options = new();
- Assert.Equal("chunks", options.CollectionName);
Assert.True(options.IncrementalIngestion);
Assert.Equal(512000, options.BatchTokenCount); // 256 * 2000
}
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
index 1ac09dd6577..6f2177f84cf 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
@@ -14,42 +14,64 @@ namespace Microsoft.Extensions.DataIngestion.Writers.Tests;
public abstract class VectorStoreWriterTests
{
[Fact]
- public async Task CanGenerateDynamicSchema()
+ public async Task CanWriteChunks()
{
string documentId = Guid.NewGuid().ToString();
using TestEmbeddingGenerator testEmbeddingGenerator = new();
using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator);
- using VectorStoreWriter writer = new(
- vectorStore,
- dimensionCount: TestEmbeddingGenerator.DimensionCount);
+
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = vectorStore.GetCollection>("chunks", definition);
+
+ using VectorStoreWriter> writer = new(collection);
IngestionDocument document = new(documentId);
IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document);
- chunk.Metadata["key1"] = "value1";
- chunk.Metadata["key2"] = 123;
- chunk.Metadata["key3"] = true;
- chunk.Metadata["key4"] = 123.45;
List> chunks = [chunk];
Assert.False(testEmbeddingGenerator.WasCalled);
await writer.WriteAsync(chunks.ToAsyncEnumerable());
- Dictionary record = await writer.VectorStoreCollection
- .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 1)
+ IngestedChunkRecord record = await writer.VectorStoreCollection
+ .GetAsync(filter: record => record.DocumentId == documentId, top: 1)
.SingleAsync();
Assert.NotNull(record);
- Assert.NotNull(record["key"]);
- Assert.Equal(documentId, record["documentid"]);
- Assert.Equal(chunks[0].Content, record["content"]);
+ Assert.NotEqual(Guid.Empty, record.Key);
+ Assert.Equal(documentId, record.DocumentId);
+ Assert.Equal(chunks[0].Content, record.Content);
Assert.True(testEmbeddingGenerator.WasCalled);
- foreach (var kvp in chunks[0].Metadata)
- {
- Assert.True(record.ContainsKey(kvp.Key), $"Record does not contain key '{kvp.Key}'");
- Assert.Equal(kvp.Value, record[kvp.Key]);
- }
+ }
+
+ [Fact]
+ public async Task CanWriteChunksWithMetadata()
+ {
+ string documentId = Guid.NewGuid().ToString();
+
+ using TestEmbeddingGenerator testEmbeddingGenerator = new();
+ using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator);
+
+ var collection = vectorStore.GetCollection("chunks-meta");
+ using VectorStoreWriter writer = new(collection);
+
+ IngestionDocument document = new(documentId);
+ IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document);
+ chunk.Metadata["Classification"] = "important";
+
+ List> chunks = [chunk];
+
+ await writer.WriteAsync(chunks.ToAsyncEnumerable());
+
+ TestChunkRecordWithMetadata record = await writer.VectorStoreCollection
+ .GetAsync(filter: record => record.DocumentId == documentId, top: 1)
+ .SingleAsync();
+
+ Assert.NotNull(record);
+ Assert.Equal(documentId, record.DocumentId);
+ Assert.Equal(chunks[0].Content, record.Content);
+ Assert.Equal("important", record.Classification);
}
[Fact]
@@ -59,9 +81,12 @@ public async Task DoesSupportIncrementalIngestion()
using TestEmbeddingGenerator testEmbeddingGenerator = new();
using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator);
- using VectorStoreWriter writer = new(
- vectorStore,
- dimensionCount: TestEmbeddingGenerator.DimensionCount,
+
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = vectorStore.GetCollection>("chunks-incr", definition);
+
+ using VectorStoreWriter> writer = new(
+ collection,
options: new()
{
IncrementalIngestion = true,
@@ -69,8 +94,6 @@ public async Task DoesSupportIncrementalIngestion()
IngestionDocument document = new(documentId);
IngestionChunk chunk1 = TestChunkFactory.CreateChunk("first chunk", document);
- chunk1.Metadata["key1"] = "value1";
-
IngestionChunk chunk2 = TestChunkFactory.CreateChunk("second chunk", document);
List> chunks = [chunk1, chunk2];
@@ -78,27 +101,25 @@ public async Task DoesSupportIncrementalIngestion()
await writer.WriteAsync(chunks.ToAsyncEnumerable());
int recordCount = await writer.VectorStoreCollection
- .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100)
+ .GetAsync(filter: record => record.DocumentId == documentId, top: 100)
.CountAsync();
Assert.Equal(chunks.Count, recordCount);
// Now we will do an incremental ingestion that updates the chunk(s).
IngestionChunk updatedChunk = TestChunkFactory.CreateChunk("different content", document);
- updatedChunk.Metadata["key1"] = "value2";
List> updatedChunks = [updatedChunk];
await writer.WriteAsync(updatedChunks.ToAsyncEnumerable());
// We ask for 100 records, but we expect only 1 as the previous 2 should have been deleted.
- Dictionary record = await writer.VectorStoreCollection
- .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100)
+ IngestedChunkRecord record = await writer.VectorStoreCollection
+ .GetAsync(filter: record => record.DocumentId == documentId, top: 100)
.SingleAsync();
Assert.NotNull(record);
- Assert.NotNull(record["key"]);
- Assert.Equal("different content", record["content"]);
- Assert.Equal("value2", record["key1"]);
+ Assert.NotEqual(Guid.Empty, record.Key);
+ Assert.Equal("different content", record.Content);
}
public static TheoryData BatchingTestCases => new()
@@ -131,9 +152,11 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts)
options.BatchTokenCount = batchTokenCount.Value;
}
- using VectorStoreWriter writer = new(
- vectorStore,
- dimensionCount: TestEmbeddingGenerator.DimensionCount,
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = vectorStore.GetCollection>("chunks-batch", definition);
+
+ using VectorStoreWriter> writer = new(
+ collection,
options: options);
IngestionDocument document = new(documentId);
@@ -146,7 +169,7 @@ public async Task BatchesChunks(int? batchTokenCount, int[] chunkTokenCounts)
await writer.WriteAsync(chunks.ToAsyncEnumerable());
int recordCount = await writer.VectorStoreCollection
- .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 100)
+ .GetAsync(filter: record => record.DocumentId == documentId, top: 100)
.CountAsync();
Assert.Equal(chunks.Count, recordCount);
@@ -159,9 +182,12 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun
using TestEmbeddingGenerator testEmbeddingGenerator = new();
using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator);
- using VectorStoreWriter writer = new(
- vectorStore,
- dimensionCount: TestEmbeddingGenerator.DimensionCount,
+
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = vectorStore.GetCollection>("chunks-many", definition);
+
+ using VectorStoreWriter> writer = new(
+ collection,
options: new()
{
IncrementalIngestion = true,
@@ -180,7 +206,7 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun
await writer.WriteAsync(chunks.ToAsyncEnumerable());
int recordCount = await writer.VectorStoreCollection
- .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 10000)
+ .GetAsync(filter: record => record.DocumentId == documentId, top: 10000)
.CountAsync();
Assert.Equal(chunks.Count, recordCount);
@@ -194,13 +220,13 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun
await writer.WriteAsync(updatedChunks.ToAsyncEnumerable());
// Verify that all old records were deleted and only the new ones remain
- List> records = await writer.VectorStoreCollection
- .GetAsync(filter: record => (string)record["documentid"]! == documentId, top: 10000)
+ List> records = await writer.VectorStoreCollection
+ .GetAsync(filter: record => record.DocumentId == documentId, top: 10000)
.ToListAsync();
Assert.Equal(updatedChunks.Count, records.Count);
- Assert.Contains(records, r => (string)r["content"]! == "updated chunk 1");
- Assert.Contains(records, r => (string)r["content"]! == "updated chunk 2");
+ Assert.Contains(records, r => (string)r.Content! == "updated chunk 1");
+ Assert.Contains(records, r => (string)r.Content! == "updated chunk 2");
}
protected abstract VectorStore CreateVectorStore(TestEmbeddingGenerator testEmbeddingGenerator);
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
index 6fc5881c18f..6e5b4212bd0 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
@@ -126,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text} ");
+ $"{result.Content} ");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
index af609ea239e..a32ef7dd6e8 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
@@ -1,31 +1,16 @@
using System.Text.Json.Serialization;
+using Microsoft.Extensions.DataIngestion;
using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
-public class IngestedChunk
+public class IngestedChunk : IngestedChunkRecord
{
public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
public const string VectorDistanceFunction = DistanceFunction.CosineDistance;
public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey(StorageName = "key")]
- [JsonPropertyName("key")]
- public required Guid Key { get; set; }
-
- [VectorStoreData(StorageName = "documentid")]
- [JsonPropertyName("documentid")]
- public required string DocumentId { get; set; }
-
- [VectorStoreData(StorageName = "content")]
- [JsonPropertyName("content")]
- public required string Text { get; set; }
-
- [VectorStoreData(StorageName = "context")]
- [JsonPropertyName("context")]
- public string? Context { get; set; }
-
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
- [JsonPropertyName("embedding")]
- public string? Vector => Text;
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)]
+ [JsonPropertyName(EmbeddingPropertyName)]
+ public override string? Embedding => Content;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 9dd366a03a5..1a482ee411b 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion;
public class DataIngestor(
ILogger logger,
ILoggerFactory loggerFactory,
- VectorStore vectorStore,
+ VectorStoreCollection vectorCollection,
IEmbeddingGenerator> embeddingGenerator)
{
public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
+ using var writer = new VectorStoreWriter(vectorCollection, new()
{
- CollectionName = IngestedChunk.CollectionName,
- DistanceFunction = IngestedChunk.VectorDistanceFunction,
IncrementalIngestion = false,
});
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
index 6fc5881c18f..6e5b4212bd0 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
@@ -126,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text} ");
+ $"{result.Content} ");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
index 2d93db8fd94..d4782d5555e 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
@@ -1,31 +1,16 @@
using System.Text.Json.Serialization;
+using Microsoft.Extensions.DataIngestion;
using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
-public class IngestedChunk
+public class IngestedChunk : IngestedChunkRecord
{
public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey(StorageName = "key")]
- [JsonPropertyName("key")]
- public required Guid Key { get; set; }
-
- [VectorStoreData(StorageName = "documentid")]
- [JsonPropertyName("documentid")]
- public required string DocumentId { get; set; }
-
- [VectorStoreData(StorageName = "content")]
- [JsonPropertyName("content")]
- public required string Text { get; set; }
-
- [VectorStoreData(StorageName = "context")]
- [JsonPropertyName("context")]
- public string? Context { get; set; }
-
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
- [JsonPropertyName("embedding")]
- public string? Vector => Text;
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)]
+ [JsonPropertyName(EmbeddingPropertyName)]
+ public override string? Embedding => Content;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 9dd366a03a5..1a482ee411b 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.A_aoai_aais.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion;
public class DataIngestor(
ILogger logger,
ILoggerFactory loggerFactory,
- VectorStore vectorStore,
+ VectorStoreCollection vectorCollection,
IEmbeddingGenerator> embeddingGenerator)
{
public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
+ using var writer = new VectorStoreWriter(vectorCollection, new()
{
- CollectionName = IngestedChunk.CollectionName,
- DistanceFunction = IngestedChunk.VectorDistanceFunction,
IncrementalIngestion = false,
});
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor
index 6fc5881c18f..6e5b4212bd0 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Components/Pages/Chat/Chat.razor
@@ -126,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text} ");
+ $"{result.Content} ");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs
index 68af3ef20fb..4dc73a5b4ca 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/IngestedChunk.cs
@@ -1,31 +1,16 @@
using System.Text.Json.Serialization;
+using Microsoft.Extensions.DataIngestion;
using Microsoft.Extensions.VectorData;
namespace aichatweb.Services;
-public class IngestedChunk
+public class IngestedChunk : IngestedChunkRecord
{
public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
public const string VectorDistanceFunction = DistanceFunction.CosineDistance;
public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey(StorageName = "key")]
- [JsonPropertyName("key")]
- public required Guid Key { get; set; }
-
- [VectorStoreData(StorageName = "documentid")]
- [JsonPropertyName("documentid")]
- public required string DocumentId { get; set; }
-
- [VectorStoreData(StorageName = "content")]
- [JsonPropertyName("content")]
- public required string Text { get; set; }
-
- [VectorStoreData(StorageName = "context")]
- [JsonPropertyName("context")]
- public string? Context { get; set; }
-
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
- [JsonPropertyName("embedding")]
- public string? Vector => Text;
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)]
+ [JsonPropertyName(EmbeddingPropertyName)]
+ public override string? Embedding => Content;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs
index d97b986b694..2c4f96cc2ac 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb._defaults.verified/aichatweb/Services/Ingestion/DataIngestor.cs
@@ -9,15 +9,13 @@ namespace aichatweb.Services.Ingestion;
public class DataIngestor(
ILogger logger,
ILoggerFactory loggerFactory,
- VectorStore vectorStore,
+ VectorStoreCollection vectorCollection,
IEmbeddingGenerator> embeddingGenerator)
{
public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
+ using var writer = new VectorStoreWriter(vectorCollection, new()
{
- CollectionName = IngestedChunk.CollectionName,
- DistanceFunction = IngestedChunk.VectorDistanceFunction,
IncrementalIngestion = false,
});
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
index 6fc5881c18f..6e5b4212bd0 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
@@ -126,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text} ");
+ $"{result.Content} ");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
index b55a8b3c817..20a86655c44 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
@@ -1,31 +1,16 @@
using System.Text.Json.Serialization;
+using Microsoft.Extensions.DataIngestion;
using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
-public class IngestedChunk
+public class IngestedChunk : IngestedChunkRecord
{
public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model
public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey(StorageName = "key")]
- [JsonPropertyName("key")]
- public required Guid Key { get; set; }
-
- [VectorStoreData(StorageName = "documentid")]
- [JsonPropertyName("documentid")]
- public required string DocumentId { get; set; }
-
- [VectorStoreData(StorageName = "content")]
- [JsonPropertyName("content")]
- public required string Text { get; set; }
-
- [VectorStoreData(StorageName = "context")]
- [JsonPropertyName("context")]
- public string? Context { get; set; }
-
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
- [JsonPropertyName("embedding")]
- public string? Vector => Text;
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)]
+ [JsonPropertyName(EmbeddingPropertyName)]
+ public override string? Embedding => Content;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 9dd366a03a5..0f91c879f57 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.o_q.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -9,15 +9,13 @@ namespace aichatweb.Web.Services.Ingestion;
public class DataIngestor(
ILogger logger,
ILoggerFactory loggerFactory,
- VectorStore vectorStore,
+ VectorStoreCollection vectorCollection,
IEmbeddingGenerator> embeddingGenerator)
{
public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
+ using var writer = new VectorStoreWriter(vectorCollection, new()
{
- CollectionName = IngestedChunk.CollectionName,
- DistanceFunction = IngestedChunk.VectorDistanceFunction,
IncrementalIngestion = false,
});
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor
index 6fc5881c18f..6e5b4212bd0 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Components/Pages/Chat/Chat.razor
@@ -126,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text} ");
+ $"{result.Content} ");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs
index 348bb5d942f..9aa8dab5fe7 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/IngestedChunk.cs
@@ -1,31 +1,16 @@
using System.Text.Json.Serialization;
+using Microsoft.Extensions.DataIngestion;
using Microsoft.Extensions.VectorData;
namespace aichatweb.Services;
-public class IngestedChunk
+public class IngestedChunk : IngestedChunkRecord
{
public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey(StorageName = "key")]
- [JsonPropertyName("key")]
- public required Guid Key { get; set; }
-
- [VectorStoreData(StorageName = "documentid")]
- [JsonPropertyName("documentid")]
- public required string DocumentId { get; set; }
-
- [VectorStoreData(StorageName = "content")]
- [JsonPropertyName("content")]
- public required string Text { get; set; }
-
- [VectorStoreData(StorageName = "context")]
- [JsonPropertyName("context")]
- public string? Context { get; set; }
-
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
- [JsonPropertyName("embedding")]
- public string? Vector => Text;
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)]
+ [JsonPropertyName(EmbeddingPropertyName)]
+ public override string? Embedding => Content;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs
index d97b986b694..2c4f96cc2ac 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb/aichatweb.oai_aais.verified/aichatweb/Services/Ingestion/DataIngestor.cs
@@ -9,15 +9,13 @@ namespace aichatweb.Services.Ingestion;
public class DataIngestor(
ILogger logger,
ILoggerFactory loggerFactory,
- VectorStore vectorStore,
+ VectorStoreCollection vectorCollection,
IEmbeddingGenerator> embeddingGenerator)
{
public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
+ using var writer = new VectorStoreWriter(vectorCollection, new()
{
- CollectionName = IngestedChunk.CollectionName,
- DistanceFunction = IngestedChunk.VectorDistanceFunction,
IncrementalIngestion = false,
});
From e48fa9d49053da2ffa13adb9d20e1d49903111ff Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 21:38:52 +0000
Subject: [PATCH 04/14] Fix warnings and test failures - correct
VectorStoreCollectionDefinition property names
Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com>
---
.../Writers/IngestedChunkRecord.cs | 82 ++++++++++---------
.../Writers/VectorStoreWriter.cs | 2 +
.../IngestionPipelineTests.cs | 4 +-
.../Writers/VectorStoreWriterTests.cs | 4 +-
4 files changed, 50 insertions(+), 42 deletions(-)
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
index dd24d8edd18..35dc5a2ba8c 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
@@ -18,7 +18,9 @@ namespace Microsoft.Extensions.DataIngestion;
/// When the vector dimension count is known at compile time, derive from this class and add
/// the to the property.
///
+#pragma warning disable CA1005 // Avoid excessive parameters on generic types - TKey, TChunk, and TRecord are all necessary
public class IngestedChunkRecord
+#pragma warning restore CA1005
{
///
/// The storage name for the property.
@@ -45,6 +47,48 @@ public class IngestedChunkRecord
///
public const string EmbeddingPropertyName = "embedding";
+ ///
+ /// Creates a for .
+ ///
+ /// The number of dimensions that the vector has.
+ ///
+ /// The distance function to use. When not provided, the default specific to given database will be used.
+ /// Check for available values.
+ ///
+ /// The index kind to use.
+ /// A suitable for creating a vector store collection.
+ /// is less than or equal to zero.
+#pragma warning disable CA1000 // Do not declare static members on generic types - needs access to TKey and TChunk type parameters
+ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null)
+#pragma warning restore CA1000
+ {
+ _ = Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0);
+
+ return new VectorStoreCollectionDefinition
+ {
+ Properties =
+ {
+ new VectorStoreKeyProperty(nameof(Key), typeof(TKey)) { StorageName = KeyPropertyName },
+
+ // By using TChunk as the type here we allow the vector store
+ // to handle the conversion from TChunk to the actual vector type it supports.
+ new VectorStoreVectorProperty(nameof(Embedding), typeof(TChunk), dimensionCount)
+ {
+ StorageName = EmbeddingPropertyName,
+ DistanceFunction = distanceFunction,
+ IndexKind = indexKind,
+ },
+ new VectorStoreDataProperty(nameof(Content), typeof(TChunk)) { StorageName = ContentPropertyName },
+ new VectorStoreDataProperty(nameof(Context), typeof(string)) { StorageName = ContextPropertyName },
+ new VectorStoreDataProperty(nameof(DocumentId), typeof(string))
+ {
+ StorageName = DocumentIdPropertyName,
+ IsIndexed = true,
+ },
+ },
+ };
+ }
+
///
/// Gets or sets the unique key for this record.
///
@@ -98,42 +142,4 @@ public virtual void SetMetadata(string key, object? value)
// Default implementation: no-op.
// Derived classes can override to map metadata keys to typed properties.
}
-
- ///
- /// Creates a for .
- ///
- /// The number of dimensions that the vector has.
- ///
- /// The distance function to use. When not provided, the default specific to given database will be used.
- /// Check for available values.
- ///
- /// The index kind to use.
- /// A suitable for creating a vector store collection.
- /// is less than or equal to zero.
- public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null)
- {
- Shared.Diagnostics.Throw.IfLessThanOrEqual(dimensionCount, 0);
-
- return new VectorStoreCollectionDefinition
- {
- Properties =
- {
- new VectorStoreKeyProperty(KeyPropertyName, typeof(TKey)),
-
- // By using TChunk as the type here we allow the vector store
- // to handle the conversion from TChunk to the actual vector type it supports.
- new VectorStoreVectorProperty(EmbeddingPropertyName, typeof(TChunk), dimensionCount)
- {
- DistanceFunction = distanceFunction,
- IndexKind = indexKind,
- },
- new VectorStoreDataProperty(ContentPropertyName, typeof(TChunk)),
- new VectorStoreDataProperty(ContextPropertyName, typeof(string)),
- new VectorStoreDataProperty(DocumentIdPropertyName, typeof(string))
- {
- IsIndexed = true,
- },
- },
- };
- }
}
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
index d373843af86..a64e0f92733 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
@@ -16,7 +16,9 @@ namespace Microsoft.Extensions.DataIngestion;
/// The type of the key for the record.
/// The type of the chunk content.
/// The type of the record stored in the vector store.
+#pragma warning disable CA1005 // Avoid excessive parameters on generic types - TKey, TChunk, and TRecord are all necessary
public sealed class VectorStoreWriter : IngestionChunkWriter
+#pragma warning restore CA1005
where TKey : notnull
where TRecord : IngestedChunkRecord, new()
{
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
index a2365b1a02e..6f1143072ab 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
@@ -106,7 +106,7 @@ public async Task CanProcessDocuments()
for (int i = 0; i < retrieved.Count; i++)
{
Assert.NotEqual(Guid.Empty, retrieved[i].Key);
- Assert.NotEmpty((string)retrieved[i].Content!);
+ Assert.NotEmpty(retrieved[i].Content!);
Assert.Contains(retrieved[i].DocumentId, _sampleFiles.Select(info => info.FullName));
}
@@ -143,7 +143,7 @@ public async Task CanProcessDocumentsInDirectory()
for (int i = 0; i < retrieved.Count; i++)
{
Assert.NotEqual(Guid.Empty, retrieved[i].Key);
- Assert.NotEmpty((string)retrieved[i].Content!);
+ Assert.NotEmpty(retrieved[i].Content!);
Assert.StartsWith(directory.FullName, retrieved[i].DocumentId);
}
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
index 6f2177f84cf..353e569030a 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
@@ -225,8 +225,8 @@ public async Task IncrementalIngestion_WithManyRecords_DeletesAllPreExistingChun
.ToListAsync();
Assert.Equal(updatedChunks.Count, records.Count);
- Assert.Contains(records, r => (string)r.Content! == "updated chunk 1");
- Assert.Contains(records, r => (string)r.Content! == "updated chunk 2");
+ Assert.Contains(records, r => r.Content == "updated chunk 1");
+ Assert.Contains(records, r => r.Content == "updated chunk 2");
}
protected abstract VectorStore CreateVectorStore(TestEmbeddingGenerator testEmbeddingGenerator);
From e9aa7fe4215a9f10d489965f6149ef9933ef6fcd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 21:39:54 +0000
Subject: [PATCH 05/14] Address code review feedback - fix error message
wording
Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com>
---
.../Writers/VectorStoreWriter.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
index a64e0f92733..a7f3399bf3c 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
@@ -120,7 +120,7 @@ private static TKey GenerateKey()
return (TKey)(object)Guid.NewGuid().ToString();
}
- throw new NotSupportedException($"Automatic key generation is not supported for key type '{typeof(TKey)}'. Supported key types are Guid and string.");
+ throw new NotSupportedException($"Automatic key generation is not supported for key type '{typeof(TKey)}'. Supported key types are Guid or string.");
}
private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken)
From 56fa7ad77847ce8ce8aa122b1a83d66aaf1e8b3f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 12:07:57 +0000
Subject: [PATCH 06/14] Address reviewer feedback: remove TKey generic, make
properties virtual, throw in SetMetadata, make consts protected, remove JSON
attributes
Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com>
---
.../Writers/IngestedChunkRecord.cs | 42 +++++++------------
.../Writers/VectorStoreWriter.cs | 37 ++++------------
.../AIChatWeb-CSharp.Web/Program.Aspire.cs | 2 +-
.../AIChatWeb-CSharp.Web/Program.cs | 2 +-
.../Services/IngestedChunk.cs | 10 +----
.../Services/Ingestion/DataIngestor.cs | 10 +----
.../Services/SemanticSearch.cs | 4 --
.../IngestionPipelineTests.cs | 24 +++++------
.../Writers/TestChunkRecordWithMetadata.cs | 3 +-
.../Writers/VectorStoreWriterTests.cs | 32 +++++++-------
.../aichatweb/aichatweb.Web/Program.cs | 2 +-
.../aichatweb.Web/Services/IngestedChunk.cs | 3 +-
.../Services/Ingestion/DataIngestor.cs | 4 +-
.../aichatweb.Web/Services/SemanticSearch.cs | 2 +-
.../aichatweb.Web/Services/IngestedChunk.cs | 3 +-
.../Services/Ingestion/DataIngestor.cs | 4 +-
.../aichatweb.Web/Services/SemanticSearch.cs | 2 +-
.../aichatweb/Program.cs | 2 +-
.../aichatweb/Services/IngestedChunk.cs | 3 +-
.../Services/Ingestion/DataIngestor.cs | 4 +-
.../aichatweb/Services/SemanticSearch.cs | 2 +-
.../aichatweb.Web/Services/IngestedChunk.cs | 3 +-
.../Services/Ingestion/DataIngestor.cs | 2 +-
.../aichatweb/Services/IngestedChunk.cs | 3 +-
.../Services/Ingestion/DataIngestor.cs | 4 +-
.../aichatweb/Services/SemanticSearch.cs | 2 +-
26 files changed, 79 insertions(+), 132 deletions(-)
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
index 35dc5a2ba8c..1a634a1c4ae 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/IngestedChunkRecord.cs
@@ -2,15 +2,13 @@
// The .NET Foundation licenses this file to you under the MIT license.
using System;
-using System.Text.Json.Serialization;
using Microsoft.Extensions.VectorData;
namespace Microsoft.Extensions.DataIngestion;
///
-/// Represents the base record type used by to store ingested chunks in a vector store.
+/// Represents the base record type used by to store ingested chunks in a vector store.
///
-/// The type of the key for the record.
/// The type of the chunk content.
///
/// When the vector dimension count is not known at compile time, use the
@@ -18,37 +16,35 @@ namespace Microsoft.Extensions.DataIngestion;
/// When the vector dimension count is known at compile time, derive from this class and add
/// the to the property.
///
-#pragma warning disable CA1005 // Avoid excessive parameters on generic types - TKey, TChunk, and TRecord are all necessary
-public class IngestedChunkRecord
-#pragma warning restore CA1005
+public class IngestedChunkRecord
{
///
/// The storage name for the property.
///
- public const string KeyPropertyName = "key";
+ protected const string KeyPropertyName = "key";
///
/// The storage name for the property.
///
- public const string DocumentIdPropertyName = "documentid";
+ protected const string DocumentIdPropertyName = "documentid";
///
/// The storage name for the property.
///
- public const string ContentPropertyName = "content";
+ protected const string ContentPropertyName = "content";
///
/// The storage name for the property.
///
- public const string ContextPropertyName = "context";
+ protected const string ContextPropertyName = "context";
///
/// The storage name for the property.
///
- public const string EmbeddingPropertyName = "embedding";
+ protected const string EmbeddingPropertyName = "embedding";
///
- /// Creates a for .
+ /// Creates a for .
///
/// The number of dimensions that the vector has.
///
@@ -58,7 +54,7 @@ public class IngestedChunkRecord
/// The index kind to use.
/// A suitable for creating a vector store collection.
/// is less than or equal to zero.
-#pragma warning disable CA1000 // Do not declare static members on generic types - needs access to TKey and TChunk type parameters
+#pragma warning disable CA1000 // Do not declare static members on generic types - needs access to TChunk type parameter
public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dimensionCount, string? distanceFunction = null, string? indexKind = null)
#pragma warning restore CA1000
{
@@ -68,7 +64,7 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim
{
Properties =
{
- new VectorStoreKeyProperty(nameof(Key), typeof(TKey)) { StorageName = KeyPropertyName },
+ new VectorStoreKeyProperty(nameof(Key), typeof(Guid)) { StorageName = KeyPropertyName },
// By using TChunk as the type here we allow the vector store
// to handle the conversion from TChunk to the actual vector type it supports.
@@ -93,29 +89,25 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim
/// Gets or sets the unique key for this record.
///
[VectorStoreKey(StorageName = KeyPropertyName)]
- [JsonPropertyName(KeyPropertyName)]
- public TKey Key { get; set; } = default!;
+ public virtual Guid Key { get; set; }
///
/// Gets or sets the identifier of the document from which this chunk was extracted.
///
[VectorStoreData(StorageName = DocumentIdPropertyName)]
- [JsonPropertyName(DocumentIdPropertyName)]
- public string DocumentId { get; set; } = string.Empty;
+ public virtual string DocumentId { get; set; } = string.Empty;
///
/// Gets or sets the content of the chunk.
///
[VectorStoreData(StorageName = ContentPropertyName)]
- [JsonPropertyName(ContentPropertyName)]
- public TChunk? Content { get; set; }
+ public virtual TChunk? Content { get; set; }
///
/// Gets or sets additional context for the chunk.
///
[VectorStoreData(StorageName = ContextPropertyName)]
- [JsonPropertyName(ContextPropertyName)]
- public string? Context { get; set; }
+ public virtual string? Context { get; set; }
///
/// Gets the embedding value for this record.
@@ -125,7 +117,6 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim
/// will convert this to a vector. Override this property in derived classes to add
/// the with the appropriate dimension count.
///
- [JsonPropertyName(EmbeddingPropertyName)]
public virtual TChunk? Embedding => Content;
///
@@ -135,11 +126,10 @@ public static VectorStoreCollectionDefinition CreateCollectionDefinition(int dim
/// The metadata value.
///
/// Override this method in derived classes to store metadata as typed properties with
- /// attributes. The default implementation is a no-op.
+ /// attributes.
///
public virtual void SetMetadata(string key, object? value)
{
- // Default implementation: no-op.
- // Derived classes can override to map metadata keys to typed properties.
+ throw new NotSupportedException($"Metadata key '{key}' is not supported. Override {nameof(SetMetadata)} in a derived class to handle metadata.");
}
}
diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
index a7f3399bf3c..dee73b6a86e 100644
--- a/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
+++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Writers/VectorStoreWriter.cs
@@ -13,25 +13,21 @@ namespace Microsoft.Extensions.DataIngestion;
///
/// Writes chunks to a .
///
-/// The type of the key for the record.
/// The type of the chunk content.
/// The type of the record stored in the vector store.
-#pragma warning disable CA1005 // Avoid excessive parameters on generic types - TKey, TChunk, and TRecord are all necessary
-public sealed class VectorStoreWriter : IngestionChunkWriter
-#pragma warning restore CA1005
- where TKey : notnull
- where TRecord : IngestedChunkRecord, new()
+public sealed class VectorStoreWriter : IngestionChunkWriter
+ where TRecord : IngestedChunkRecord, new()
{
private readonly VectorStoreWriterOptions _options;
private bool _collectionEnsured;
///
- /// Initializes a new instance of the class.
+ /// Initializes a new instance of the class.
///
/// The to use to store the instances.
/// The options for the vector store writer.
/// When is null.
- public VectorStoreWriter(VectorStoreCollection collection, VectorStoreWriterOptions? options = default)
+ public VectorStoreWriter(VectorStoreCollection collection, VectorStoreWriterOptions? options = default)
{
VectorStoreCollection = Throw.IfNull(collection);
_options = options ?? new VectorStoreWriterOptions();
@@ -40,14 +36,14 @@ public VectorStoreWriter(VectorStoreCollection collection, Vector
///
/// Gets the underlying used to store the chunks.
///
- public VectorStoreCollection VectorStoreCollection { get; }
+ public VectorStoreCollection VectorStoreCollection { get; }
///
public override async Task WriteAsync(IAsyncEnumerable> chunks, CancellationToken cancellationToken = default)
{
_ = Throw.IfNull(chunks);
- IReadOnlyList? preExistingKeys = null;
+ IReadOnlyList? preExistingKeys = null;
List? batch = null;
long currentBatchTokenCount = 0;
@@ -66,7 +62,7 @@ public override async Task WriteAsync(IAsyncEnumerable> c
TRecord record = new()
{
- Key = GenerateKey(),
+ Key = Guid.NewGuid(),
Content = chunk.Content,
Context = chunk.Context,
DocumentId = chunk.Document.Identifier,
@@ -108,22 +104,7 @@ public override async Task WriteAsync(IAsyncEnumerable> c
}
}
- private static TKey GenerateKey()
- {
- if (typeof(TKey) == typeof(Guid))
- {
- return (TKey)(object)Guid.NewGuid();
- }
-
- if (typeof(TKey) == typeof(string))
- {
- return (TKey)(object)Guid.NewGuid().ToString();
- }
-
- throw new NotSupportedException($"Automatic key generation is not supported for key type '{typeof(TKey)}'. Supported key types are Guid or string.");
- }
-
- private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken)
+ private async Task> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken)
{
if (!_options.IncrementalIngestion)
{
@@ -133,7 +114,7 @@ private async Task> GetPreExistingChunksIdsAsync(IngestionDo
// Each Vector Store has a different max top count limit, so we use low value and loop.
const int MaxTopCount = 1_000;
- List keys = [];
+ List keys = [];
int insertedCount;
do
{
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs
index 31442718f1f..178d77b9465 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.Aspire.cs
@@ -44,7 +44,7 @@
var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db");
var vectorStoreConnectionString = $"Data Source={vectorStorePath}";
builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString);
-builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString);
+builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString);
#endif
builder.Services.AddSingleton();
builder.Services.AddSingleton();
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs
index 4b041f4a15f..0736628c417 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Program.cs
@@ -105,7 +105,7 @@
var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db");
var vectorStoreConnectionString = $"Data Source={vectorStorePath}";
builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString);
-builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString);
+builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString);
#endif
builder.Services.AddSingleton();
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs
index ae1ac018622..915f72c8339 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/IngestedChunk.cs
@@ -1,14 +1,9 @@
-using System.Text.Json.Serialization;
-using Microsoft.Extensions.DataIngestion;
+using Microsoft.Extensions.DataIngestion;
using Microsoft.Extensions.VectorData;
namespace AIChatWeb_CSharp.Web.Services;
-#if (IsQdrant)
-public class IngestedChunk : IngestedChunkRecord
-#else
-public class IngestedChunk : IngestedChunkRecord
-#endif
+public class IngestedChunk : IngestedChunkRecord
{
#if (IsOllama)
public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model
@@ -23,6 +18,5 @@ public class IngestedChunk : IngestedChunkRecord
public const string CollectionName = "data-AIChatWeb-CSharp.Web-chunks";
[VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = EmbeddingPropertyName)]
- [JsonPropertyName(EmbeddingPropertyName)]
public override string? Embedding => Content;
}
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
index 5d9cffe29a4..76168b6e632 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/Ingestion/DataIngestor.cs
@@ -9,20 +9,12 @@ namespace AIChatWeb_CSharp.Web.Services.Ingestion;
public class DataIngestor(
ILogger logger,
ILoggerFactory loggerFactory,
-#if (IsQdrant)
VectorStoreCollection vectorCollection,
-#else
- VectorStoreCollection vectorCollection,
-#endif
IEmbeddingGenerator> embeddingGenerator)
{
public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
-#if (IsQdrant)
- using var writer = new VectorStoreWriter(vectorCollection, new()
-#else
- using var writer = new VectorStoreWriter(vectorCollection, new()
-#endif
+ using var writer = new VectorStoreWriter(vectorCollection, new()
{
IncrementalIngestion = false,
});
diff --git a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs
index 49bef8de3d1..8cdc6dbeae0 100644
--- a/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs
+++ b/src/ProjectTemplates/Microsoft.Extensions.AI.Templates/templates/AIChatWeb-CSharp/AIChatWeb-CSharp.Web/Services/SemanticSearch.cs
@@ -4,11 +4,7 @@
namespace AIChatWeb_CSharp.Web.Services;
public class SemanticSearch(
-#if (IsQdrant)
VectorStoreCollection vectorCollection,
-#else
- VectorStoreCollection vectorCollection,
-#endif
[FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory,
DataIngestor dataIngestor)
{
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
index 6f1143072ab..cba7f497857 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/IngestionPipelineTests.cs
@@ -86,9 +86,9 @@ public async Task CanProcessDocuments()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
- var collection = testVectorStore.GetCollection>("chunks", definition);
- using VectorStoreWriter> vectorStoreWriter = new(collection);
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = testVectorStore.GetCollection>("chunks", definition);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter);
List ingestionResults = await pipeline.ProcessAsync(_sampleFiles).ToListAsync();
@@ -122,9 +122,9 @@ public async Task CanProcessDocumentsInDirectory()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
- var collection = testVectorStore.GetCollection>("chunks-dir", definition);
- using VectorStoreWriter> vectorStoreWriter = new(collection);
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = testVectorStore.GetCollection>("chunks-dir", definition);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
using IngestionPipeline pipeline = new(CreateReader(), CreateChunker(), vectorStoreWriter);
@@ -159,9 +159,9 @@ public async Task ChunksCanBeMoreThanJustText()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
- var collection = testVectorStore.GetCollection>("chunks-img", definition);
- using VectorStoreWriter> vectorStoreWriter = new(collection);
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = testVectorStore.GetCollection>("chunks-img", definition);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
using IngestionPipeline pipeline = new(CreateReader(), new ImageChunker(), vectorStoreWriter);
Assert.False(embeddingGenerator.WasCalled);
@@ -211,9 +211,9 @@ public async Task SingleFailureDoesNotTearDownEntirePipeline()
TestEmbeddingGenerator embeddingGenerator = new();
using InMemoryVectorStore testVectorStore = new(new() { EmbeddingGenerator = embeddingGenerator });
- var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
- var collection = testVectorStore.GetCollection>("chunks-fail", definition);
- using VectorStoreWriter> vectorStoreWriter = new(collection);
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = testVectorStore.GetCollection>("chunks-fail", definition);
+ using VectorStoreWriter> vectorStoreWriter = new(collection);
using IngestionPipeline pipeline = new(failingForFirstReader, CreateChunker(), vectorStoreWriter);
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs
index bc283241509..505e79cfd69 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/TestChunkRecordWithMetadata.cs
@@ -7,12 +7,11 @@
namespace Microsoft.Extensions.DataIngestion.Writers.Tests;
-public class TestChunkRecordWithMetadata : IngestedChunkRecord
+public class TestChunkRecordWithMetadata : IngestedChunkRecord
{
public const int TestDimensionCount = 4;
[VectorStoreVector(TestDimensionCount, StorageName = EmbeddingPropertyName)]
- [JsonPropertyName(EmbeddingPropertyName)]
public override string? Embedding => Content;
[VectorStoreData(StorageName = "classification")]
diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
index 353e569030a..99cf48188a3 100644
--- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
+++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Writers/VectorStoreWriterTests.cs
@@ -21,10 +21,10 @@ public async Task CanWriteChunks()
using TestEmbeddingGenerator testEmbeddingGenerator = new();
using VectorStore vectorStore = CreateVectorStore(testEmbeddingGenerator);
- var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
- var collection = vectorStore.GetCollection>("chunks", definition);
+ var definition = IngestedChunkRecord.CreateCollectionDefinition(TestEmbeddingGenerator.DimensionCount);
+ var collection = vectorStore.GetCollection>("chunks", definition);
- using VectorStoreWriter> writer = new(collection);
+ using VectorStoreWriter> writer = new(collection);
IngestionDocument document = new(documentId);
IngestionChunk chunk = TestChunkFactory.CreateChunk("some content", document);
@@ -34,7 +34,7 @@ public async Task CanWriteChunks()
Assert.False(testEmbeddingGenerator.WasCalled);
await writer.WriteAsync(chunks.ToAsyncEnumerable());
- IngestedChunkRecord