Skip to content

Commit 8da53ed

Browse files
committed
minor #1012 [Store][ChromaDB] Add documents for text context (xprojects-de)
This PR was merged into the main branch. Discussion ---------- [Store][ChromaDB] Add documents for text context | Q | A | ------------- | --- | Bug fix? | no | New feature? | no <!-- please update src/**/CHANGELOG.md files --> | Docs? | no <!-- required for new features --> | Issues | License | MIT To work properly with embeddings, the text context is also needed, otherwise LLM won't get the overall context. Therefore, it's possible to query the text context from the database too. Commits ------- bfe346c adding support for documents
2 parents 65d9895 + bfe346c commit 8da53ed

File tree

2 files changed

+122
-3
lines changed

2 files changed

+122
-3
lines changed

src/store/src/Bridge/ChromaDb/Store.php

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,40 @@ public function add(VectorDocument ...$documents): void
5353
}
5454

5555
/**
56-
* @param array{where?: array<string, string>, whereDocument?: array<string, mixed>} $options
56+
* @param array{where?: array<string, string>, whereDocument?: array<string, mixed>, include?: array<string>} $options
5757
*/
5858
public function query(Vector $vector, array $options = []): iterable
5959
{
60+
$include = null;
61+
if ([] !== ($options['include'] ?? [])) {
62+
$include = array_values(
63+
array_unique(
64+
array_merge(['embeddings', 'metadatas', 'distances'], $options['include'])
65+
)
66+
);
67+
}
68+
6069
$collection = $this->client->getOrCreateCollection($this->collectionName);
6170
$queryResponse = $collection->query(
6271
queryEmbeddings: [$vector->getData()],
6372
nResults: 4,
6473
where: $options['where'] ?? null,
6574
whereDocument: $options['whereDocument'] ?? null,
75+
include: $include,
6676
);
6777

68-
for ($i = 0; $i < \count($queryResponse->metadatas[0]); ++$i) {
78+
$metaCount = \count($queryResponse->metadatas[0]);
79+
80+
for ($i = 0; $i < $metaCount; ++$i) {
81+
$metaData = new Metadata($queryResponse->metadatas[0][$i]);
82+
if (isset($queryResponse->documents[0][$i])) {
83+
$metaData->setText($queryResponse->documents[0][$i]);
84+
}
85+
6986
yield new VectorDocument(
7087
id: Uuid::fromString($queryResponse->ids[0][$i]),
7188
vector: new Vector($queryResponse->embeddings[0][$i]),
72-
metadata: new Metadata($queryResponse->metadatas[0][$i]),
89+
metadata: $metaData,
7390
score: $queryResponse->distances[0][$i] ?? null,
7491
);
7592
}

src/store/tests/Bridge/ChromaDb/StoreTest.php

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,108 @@ public function testQueryWithVariousFilterCombinations(
469469
$this->assertCount(1, $documents);
470470
}
471471

472+
public function testQueryReturnsMetadatasEmbeddingsDistanceWithoutInclude()
473+
{
474+
$queryVector = new Vector([0.15, 0.25, 0.35]);
475+
$queryResponse = new QueryItemsResponse(
476+
ids: [['01234567-89ab-cdef-0123-456789abcdef']],
477+
embeddings: [[[0.1, 0.2, 0.3]]],
478+
metadatas: [[['title' => 'Doc 1']]],
479+
documents: null,
480+
data: null,
481+
uris: null,
482+
distances: null
483+
);
484+
485+
$collection = $this->createMock(CollectionResource::class);
486+
$client = $this->createMock(Client::class);
487+
488+
$client->expects($this->once())
489+
->method('getOrCreateCollection')
490+
->with('test-collection')
491+
->willReturn($collection);
492+
493+
$collection->expects($this->once())
494+
->method('query')
495+
->willReturn($queryResponse);
496+
497+
$store = new Store($client, 'test-collection');
498+
$documents = iterator_to_array($store->query($queryVector));
499+
500+
$this->assertCount(1, $documents);
501+
$this->assertSame('01234567-89ab-cdef-0123-456789abcdef', (string) $documents[0]->id);
502+
$this->assertSame([0.1, 0.2, 0.3], $documents[0]->vector->getData());
503+
$this->assertSame(['title' => 'Doc 1'], $documents[0]->metadata->getArrayCopy());
504+
}
505+
506+
public function testQueryReturnsMetadatasEmbeddingsDistanceWithOnlyDocuments()
507+
{
508+
$queryVector = new Vector([0.15, 0.25, 0.35]);
509+
$queryResponse = new QueryItemsResponse(
510+
ids: [['01234567-89ab-cdef-0123-456789abcdef']],
511+
embeddings: [[[0.1, 0.2, 0.3]]],
512+
metadatas: [[['title' => 'Doc 1']]],
513+
documents: [['Document content here']],
514+
data: null,
515+
uris: null,
516+
distances: null
517+
);
518+
519+
$collection = $this->createMock(CollectionResource::class);
520+
$client = $this->createMock(Client::class);
521+
522+
$client->expects($this->once())
523+
->method('getOrCreateCollection')
524+
->with('test-collection')
525+
->willReturn($collection);
526+
527+
$collection->expects($this->once())
528+
->method('query')
529+
->willReturn($queryResponse);
530+
531+
$store = new Store($client, 'test-collection');
532+
$documents = iterator_to_array($store->query($queryVector, ['include' => ['documents']]));
533+
534+
$this->assertCount(1, $documents);
535+
$this->assertSame('01234567-89ab-cdef-0123-456789abcdef', (string) $documents[0]->id);
536+
$this->assertSame([0.1, 0.2, 0.3], $documents[0]->vector->getData());
537+
$this->assertSame(['title' => 'Doc 1', '_text' => 'Document content here'], $documents[0]->metadata->getArrayCopy());
538+
}
539+
540+
public function testQueryReturnsMetadatasEmbeddingsDistanceWithAll()
541+
{
542+
$queryVector = new Vector([0.15, 0.25, 0.35]);
543+
$queryResponse = new QueryItemsResponse(
544+
ids: [['01234567-89ab-cdef-0123-456789abcdef']],
545+
embeddings: [[[0.1, 0.2, 0.3]]],
546+
metadatas: [[['title' => 'Doc 1']]],
547+
documents: [['Document content here']],
548+
data: null,
549+
uris: null,
550+
distances: null
551+
);
552+
553+
$collection = $this->createMock(CollectionResource::class);
554+
$client = $this->createMock(Client::class);
555+
556+
$client->expects($this->once())
557+
->method('getOrCreateCollection')
558+
->with('test-collection')
559+
->willReturn($collection);
560+
561+
$collection->expects($this->once())
562+
->method('query')
563+
->willReturn($queryResponse);
564+
565+
$store = new Store($client, 'test-collection');
566+
$documents = iterator_to_array($store->query($queryVector, ['include' => ['embeddings', 'metadatas', 'distances', 'documents']]));
567+
568+
$this->assertCount(1, $documents);
569+
$this->assertSame('01234567-89ab-cdef-0123-456789abcdef', (string) $documents[0]->id);
570+
$this->assertSame([0.1, 0.2, 0.3], $documents[0]->vector->getData());
571+
$this->assertSame(['title' => 'Doc 1', '_text' => 'Document content here'], $documents[0]->metadata->getArrayCopy());
572+
}
573+
472574
/**
473575
* @return \Iterator<string, array{
474576
* options: array{where?: array<string, string>, whereDocument?: array<string, mixed>},

0 commit comments

Comments
 (0)