Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions config/vector.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,22 @@
'request' => (int) env('QDRANT_REQUEST_TIMEOUT', 30),
],

/*
|--------------------------------------------------------------------------
| Embedding Configuration
|--------------------------------------------------------------------------
|
| Provider: ollama, openai, none
| Model: provider-specific model name (e.g. bge-large, text-embedding-3-large)
|
*/

'embeddings' => [
'provider' => env('EMBEDDING_PROVIDER', 'ollama'),
'model' => env('EMBEDDING_MODEL', 'bge-large'),
'url' => env('EMBEDDING_URL'),
'api_key' => env('EMBEDDING_API_KEY'),
'dimensions' => env('EMBEDDING_DIMENSIONS') ? (int) env('EMBEDDING_DIMENSIONS') : null,
],

];
23 changes: 23 additions & 0 deletions src/Contracts/EmbeddingClient.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

declare(strict_types=1);

namespace TheShit\Vector\Contracts;

interface EmbeddingClient
{
/**
* Generate an embedding vector for the given text.
*
* @return array<float>
*/
public function embed(string $text): array;

/**
* Generate embedding vectors for multiple texts.
*
* @param array<string> $texts
* @return array<array<float>>
*/
public function embedBatch(array $texts): array;
}
27 changes: 27 additions & 0 deletions src/Embeddings/NullEmbeddings.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php

declare(strict_types=1);

namespace TheShit\Vector\Embeddings;

use TheShit\Vector\Contracts\EmbeddingClient;

class NullEmbeddings implements EmbeddingClient
{
/**
* @return array<float>
*/
public function embed(string $text): array
{
return [];
}

/**
* @param array<string> $texts
* @return array<array<float>>
*/
public function embedBatch(array $texts): array
{
return [];
}
}
37 changes: 37 additions & 0 deletions src/Embeddings/OllamaConnector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

declare(strict_types=1);

namespace TheShit\Vector\Embeddings;

use Saloon\Http\Connector;
use Saloon\Traits\Plugins\HasTimeout;

class OllamaConnector extends Connector
{
use HasTimeout;

protected int $connectTimeout = 5;

protected int $requestTimeout = 30;

public function __construct(
protected readonly string $baseUrl,
) {}

public function resolveBaseUrl(): string
{
return rtrim($this->baseUrl, '/');
}

/**
* @return array<string, string>
*/
protected function defaultHeaders(): array
{
return [
'Content-Type' => 'application/json',
'Accept' => 'application/json',
];
}
}
61 changes: 61 additions & 0 deletions src/Embeddings/OllamaEmbeddings.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?php

declare(strict_types=1);

namespace TheShit\Vector\Embeddings;

use Saloon\Exceptions\Request\RequestException;
use TheShit\Vector\Contracts\EmbeddingClient;
use TheShit\Vector\Embeddings\Requests\OllamaEmbedRequest;

class OllamaEmbeddings implements EmbeddingClient
{
public function __construct(
protected readonly OllamaConnector $connector,
protected readonly string $model = 'bge-large',
) {}

/**
* @return array<float>
*/
public function embed(string $text): array
{
if (trim($text) === '') {
return [];
}

$result = $this->embedBatch([$text]);

return $result[0] ?? [];
}

/**
* @param array<string> $texts
* @return array<array<float>>
*/
public function embedBatch(array $texts): array
{
$texts = array_values(array_filter($texts, fn (string $t): bool => trim($t) !== ''));

if ($texts === []) {
return [];
}

try {
$response = $this->connector->send(new OllamaEmbedRequest($this->model, $texts));
$response->throw();
} catch (RequestException) {
return array_fill(0, count($texts), []);
}

/** @var array<array<float>> $embeddings */
$embeddings = $response->json('embeddings') ?? [];

return array_map(
fn (mixed $embedding): array => is_array($embedding)
? array_map(fn (mixed $v): float => $v, $embedding)
: [],
$embeddings,
);
}
}
44 changes: 44 additions & 0 deletions src/Embeddings/OpenAiConnector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?php

declare(strict_types=1);

namespace TheShit\Vector\Embeddings;

use Saloon\Http\Auth\TokenAuthenticator;
use Saloon\Http\Connector;
use Saloon\Traits\Plugins\HasTimeout;

class OpenAiConnector extends Connector
{
use HasTimeout;

protected int $connectTimeout = 5;

protected int $requestTimeout = 30;

public function __construct(
protected readonly string $baseUrl,
protected readonly string $apiKey,
) {}

public function resolveBaseUrl(): string
{
return rtrim($this->baseUrl, '/');
}

/**
* @return array<string, string>
*/
protected function defaultHeaders(): array
{
return [
'Content-Type' => 'application/json',
'Accept' => 'application/json',
];
}

protected function defaultAuth(): TokenAuthenticator
{
return new TokenAuthenticator($this->apiKey);
}
}
62 changes: 62 additions & 0 deletions src/Embeddings/OpenAiEmbeddings.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php

declare(strict_types=1);

namespace TheShit\Vector\Embeddings;

use Saloon\Exceptions\Request\RequestException;
use TheShit\Vector\Contracts\EmbeddingClient;
use TheShit\Vector\Embeddings\Requests\OpenAiEmbedRequest;

class OpenAiEmbeddings implements EmbeddingClient
{
public function __construct(
protected readonly OpenAiConnector $connector,
protected readonly string $model = 'text-embedding-3-large',
protected readonly ?int $dimensions = null,
) {}

/**
* @return array<float>
*/
public function embed(string $text): array
{
if (trim($text) === '') {
return [];
}

$result = $this->embedBatch([$text]);

return $result[0] ?? [];
}

/**
* @param array<string> $texts
* @return array<array<float>>
*/
public function embedBatch(array $texts): array
{
$texts = array_values(array_filter($texts, fn (string $t): bool => trim($t) !== ''));

if ($texts === []) {
return [];
}

try {
$response = $this->connector->send(new OpenAiEmbedRequest($this->model, $texts, $this->dimensions));
$response->throw();
} catch (RequestException) {
return array_fill(0, count($texts), []);
}

/** @var array<array{embedding: array<float>}> $data */
$data = $response->json('data') ?? [];

return array_map(
fn (mixed $item): array => is_array($item) && isset($item['embedding']) && is_array($item['embedding'])
? array_map(fn (mixed $v): float => $v, $item['embedding'])
: [],
$data,
);
}
}
41 changes: 41 additions & 0 deletions src/Embeddings/Requests/OllamaEmbedRequest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?php

declare(strict_types=1);

namespace TheShit\Vector\Embeddings\Requests;

use Saloon\Contracts\Body\HasBody;
use Saloon\Enums\Method;
use Saloon\Http\Request;
use Saloon\Traits\Body\HasJsonBody;

class OllamaEmbedRequest extends Request implements HasBody
{
use HasJsonBody;

protected Method $method = Method::POST;

/**
* @param array<string> $texts
*/
public function __construct(
protected readonly string $model,
protected readonly array $texts,
) {}

public function resolveEndpoint(): string
{
return '/api/embed';
}

/**
* @return array<string, mixed>
*/
protected function defaultBody(): array
{
return [
'model' => $this->model,
'input' => $this->texts,
];
}
}
48 changes: 48 additions & 0 deletions src/Embeddings/Requests/OpenAiEmbedRequest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

declare(strict_types=1);

namespace TheShit\Vector\Embeddings\Requests;

use Saloon\Contracts\Body\HasBody;
use Saloon\Enums\Method;
use Saloon\Http\Request;
use Saloon\Traits\Body\HasJsonBody;

class OpenAiEmbedRequest extends Request implements HasBody
{
use HasJsonBody;

protected Method $method = Method::POST;

/**
* @param array<string> $texts
*/
public function __construct(
protected readonly string $model,
protected readonly array $texts,
protected readonly ?int $dimensions = null,
) {}

public function resolveEndpoint(): string
{
return '/v1/embeddings';
}

/**
* @return array<string, mixed>
*/
protected function defaultBody(): array
{
$body = [
'model' => $this->model,
'input' => $this->texts,
];

if ($this->dimensions !== null) {
$body['dimensions'] = $this->dimensions;
}

return $body;
}
}
Loading
Loading