|
| 1 | +<?php |
| 2 | + |
| 3 | +/* |
| 4 | + * This file is part of the Symfony package. |
| 5 | + * |
| 6 | + * (c) Fabien Potencier <fabien@symfony.com> |
| 7 | + * |
| 8 | + * For the full copyright and license information, please view the LICENSE |
| 9 | + * file that was distributed with this source code. |
| 10 | + */ |
| 11 | + |
| 12 | +namespace Symfony\AI\Store\Bridge\OpenSearch; |
| 13 | + |
| 14 | +use Symfony\AI\Platform\Vector\NullVector; |
| 15 | +use Symfony\AI\Platform\Vector\Vector; |
| 16 | +use Symfony\AI\Store\Document\Metadata; |
| 17 | +use Symfony\AI\Store\Document\VectorDocument; |
| 18 | +use Symfony\AI\Store\Exception\InvalidArgumentException; |
| 19 | +use Symfony\AI\Store\ManagedStoreInterface; |
| 20 | +use Symfony\AI\Store\StoreInterface; |
| 21 | +use Symfony\Component\Uid\Uuid; |
| 22 | +use Symfony\Contracts\HttpClient\HttpClientInterface; |
| 23 | + |
| 24 | +/** |
| 25 | + * @author Guillaume Loulier <personal@guillaumeloulier.fr> |
| 26 | + */ |
| 27 | +final class Store implements ManagedStoreInterface, StoreInterface |
| 28 | +{ |
| 29 | + public function __construct( |
| 30 | + private readonly HttpClientInterface $httpClient, |
| 31 | + private readonly string $endpoint, |
| 32 | + private readonly string $indexName, |
| 33 | + private readonly string $vectorsField = '_vectors', |
| 34 | + private readonly int $dimensions = 1536, |
| 35 | + private readonly string $spaceType = 'l2', |
| 36 | + ) { |
| 37 | + } |
| 38 | + |
| 39 | + public function setup(array $options = []): void |
| 40 | + { |
| 41 | + $indexExistResponse = $this->httpClient->request('HEAD', \sprintf('%s/%s', $this->endpoint, $this->indexName)); |
| 42 | + |
| 43 | + if (200 === $indexExistResponse->getStatusCode()) { |
| 44 | + return; |
| 45 | + } |
| 46 | + |
| 47 | + $this->request('PUT', $this->indexName, [ |
| 48 | + 'settings' => [ |
| 49 | + 'index.knn' => true, |
| 50 | + ], |
| 51 | + 'mappings' => [ |
| 52 | + 'properties' => [ |
| 53 | + $this->vectorsField => [ |
| 54 | + 'type' => 'knn_vector', |
| 55 | + 'dimension' => $options['dimensions'] ?? $this->dimensions, |
| 56 | + 'space_type' => $options['space_type'] ?? $this->spaceType, |
| 57 | + ], |
| 58 | + ], |
| 59 | + ], |
| 60 | + ]); |
| 61 | + } |
| 62 | + |
| 63 | + public function drop(): void |
| 64 | + { |
| 65 | + $indexExistResponse = $this->httpClient->request('HEAD', \sprintf('%s/%s', $this->endpoint, $this->indexName)); |
| 66 | + |
| 67 | + if (404 === $indexExistResponse->getStatusCode()) { |
| 68 | + throw new InvalidArgumentException(\sprintf('The index "%s" does not exist.', $this->indexName)); |
| 69 | + } |
| 70 | + |
| 71 | + $this->request('DELETE', $this->indexName); |
| 72 | + } |
| 73 | + |
| 74 | + public function add(VectorDocument ...$documents): void |
| 75 | + { |
| 76 | + $documentToIndex = fn (VectorDocument $document): array => [ |
| 77 | + 'index' => [ |
| 78 | + '_index' => $this->indexName, |
| 79 | + '_id' => $document->id->toRfc4122(), |
| 80 | + ], |
| 81 | + ]; |
| 82 | + |
| 83 | + $documentToPayload = fn (VectorDocument $document): array => [ |
| 84 | + $this->vectorsField => $document->vector->getData(), |
| 85 | + 'metadata' => json_encode($document->metadata->getArrayCopy()), |
| 86 | + ]; |
| 87 | + |
| 88 | + $this->request('POST', '_bulk', function () use ($documents, $documentToIndex, $documentToPayload) { |
| 89 | + foreach ($documents as $document) { |
| 90 | + yield json_encode($documentToIndex($document)).\PHP_EOL.json_encode($documentToPayload($document)).\PHP_EOL; |
| 91 | + } |
| 92 | + }); |
| 93 | + } |
| 94 | + |
| 95 | + public function query(Vector $vector, array $options = []): iterable |
| 96 | + { |
| 97 | + $documents = $this->request('POST', \sprintf('%s/_search', $this->indexName), [ |
| 98 | + 'size' => $options['size'] ?? 100, |
| 99 | + 'query' => [ |
| 100 | + 'knn' => [ |
| 101 | + $this->vectorsField => [ |
| 102 | + 'vector' => $vector->getData(), |
| 103 | + 'k' => $options['k'] ?? 100, |
| 104 | + ], |
| 105 | + ], |
| 106 | + ], |
| 107 | + ]); |
| 108 | + |
| 109 | + foreach ($documents['hits']['hits'] as $document) { |
| 110 | + yield $this->convertToVectorDocument($document); |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + /** |
| 115 | + * @param \Closure|array<string, mixed> $payload |
| 116 | + * |
| 117 | + * @return array<string, mixed> |
| 118 | + */ |
| 119 | + private function request(string $method, string $path, \Closure|array $payload = []): array |
| 120 | + { |
| 121 | + $finalOptions = []; |
| 122 | + |
| 123 | + if (\is_array($payload) && [] !== $payload) { |
| 124 | + $finalOptions['json'] = $payload; |
| 125 | + } |
| 126 | + |
| 127 | + if ($payload instanceof \Closure) { |
| 128 | + $finalOptions = [ |
| 129 | + 'headers' => [ |
| 130 | + 'Content-Type' => 'application/x-ndjson', |
| 131 | + ], |
| 132 | + 'body' => $payload(), |
| 133 | + ]; |
| 134 | + } |
| 135 | + |
| 136 | + $response = $this->httpClient->request($method, \sprintf('%s/%s', $this->endpoint, $path), $finalOptions); |
| 137 | + |
| 138 | + return $response->toArray(); |
| 139 | + } |
| 140 | + |
| 141 | + /** |
| 142 | + * @param array{ |
| 143 | + * '_id'?: string, |
| 144 | + * '_source': array<string, mixed>, |
| 145 | + * '_score': float, |
| 146 | + * } $document |
| 147 | + */ |
| 148 | + private function convertToVectorDocument(array $document): VectorDocument |
| 149 | + { |
| 150 | + $id = $document['_id'] ?? throw new InvalidArgumentException('Missing "_id" field in the document data.'); |
| 151 | + |
| 152 | + $vector = !\array_key_exists($this->vectorsField, $document['_source']) || null === $document['_source'][$this->vectorsField] |
| 153 | + ? new NullVector() |
| 154 | + : new Vector($document['_source'][$this->vectorsField]); |
| 155 | + |
| 156 | + return new VectorDocument(Uuid::fromString($id), $vector, new Metadata(json_decode($document['_source']['metadata'], true)), $document['_score'] ?? null); |
| 157 | + } |
| 158 | +} |
0 commit comments