Skip to content

Commit 38c15ee

Browse files
committed
test(store): add comprehensive tests for BM25 features
Add 3 new tests covering newly introduced functionality: - testFuzzyMatchingWithWordSimilarity: Verifies pg_trgm fuzzy matching with word_similarity() and custom thresholds (primary, secondary, strict) - testSearchableAttributesWithBoost: Ensures field-specific tsvector columns are created with proper GIN indexes (title_tsv, overview_tsv) - testFuzzyWeightParameter: Validates fuzzy weight distribution in RRF formula when combining vector, BM25, and fuzzy scores All tests verify SQL generation via callback assertions. Test suite: 19 tests, 132 assertions, all passing.
1 parent 1293ac1 commit 38c15ee

File tree

1 file changed

+115
-0
lines changed

1 file changed

+115
-0
lines changed

src/store/tests/Bridge/Postgres/HybridStoreTest.php

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,121 @@ public function testPureKeywordSearchReturnsEmptyWhenNoMatch()
529529
$this->assertCount(0, $results);
530530
}
531531

532+
public function testFuzzyMatchingWithWordSimilarity()
533+
{
534+
$pdo = $this->createMock(\PDO::class);
535+
$statement = $this->createMock(\PDOStatement::class);
536+
537+
// Test fuzzy matching with custom thresholds
538+
$store = new HybridStore(
539+
$pdo,
540+
'hybrid_table',
541+
semanticRatio: 0.5,
542+
fuzzyWeight: 0.3,
543+
fuzzyPrimaryThreshold: 0.3,
544+
fuzzySecondaryThreshold: 0.25,
545+
fuzzyStrictThreshold: 0.2
546+
);
547+
548+
$pdo->expects($this->once())
549+
->method('prepare')
550+
->with($this->callback(function ($sql) {
551+
// Verify fuzzy_scores CTE exists
552+
$this->assertStringContainsString('fuzzy_scores AS', $sql);
553+
554+
// Verify word_similarity function is used
555+
$this->assertStringContainsString('word_similarity(:query, search_text)', $sql);
556+
557+
// Verify custom thresholds are applied
558+
$this->assertStringContainsString('0.300000', $sql); // Primary threshold
559+
$this->assertStringContainsString('0.250000', $sql); // Secondary threshold
560+
$this->assertStringContainsString('0.200000', $sql); // Strict threshold
561+
562+
return true;
563+
}))
564+
->willReturn($statement);
565+
566+
$statement->expects($this->once())->method('execute');
567+
$statement->expects($this->once())->method('fetchAll')->willReturn([]);
568+
569+
$store->query(new Vector([0.1, 0.2, 0.3]), ['q' => 'test']);
570+
}
571+
572+
public function testSearchableAttributesWithBoost()
573+
{
574+
$pdo = $this->createMock(\PDO::class);
575+
576+
// Test with searchable attributes configuration
577+
$searchableAttributes = [
578+
'title' => ['boost' => 2.0, 'metadata_key' => 'title'],
579+
'overview' => ['boost' => 1.0, 'metadata_key' => 'overview'],
580+
];
581+
582+
$store = new HybridStore(
583+
$pdo,
584+
'hybrid_table',
585+
searchableAttributes: $searchableAttributes
586+
);
587+
588+
$pdo->expects($this->exactly(10))
589+
->method('exec')
590+
->willReturnCallback(function (string $sql): int {
591+
static $callCount = 0;
592+
++$callCount;
593+
594+
if (3 === $callCount) {
595+
// Verify separate tsvector columns for each attribute
596+
$this->assertStringContainsString('title_tsv tsvector GENERATED ALWAYS AS', $sql);
597+
$this->assertStringContainsString('overview_tsv tsvector GENERATED ALWAYS AS', $sql);
598+
599+
// Should NOT contain generic content_tsv (backward compat mode)
600+
$this->assertStringNotContainsString('content_tsv tsvector GENERATED ALWAYS AS (to_tsvector(\'simple\', content)) STORED', $sql);
601+
} elseif ($callCount >= 8 && $callCount <= 9) {
602+
// Verify separate GIN indexes for each attribute (title_tsv_idx, overview_tsv_idx)
603+
$this->assertStringContainsString('_tsv_idx', $sql);
604+
$this->assertStringContainsString('USING gin(', $sql);
605+
}
606+
607+
return 0;
608+
});
609+
610+
$store->setup();
611+
}
612+
613+
public function testFuzzyWeightParameter()
614+
{
615+
$pdo = $this->createMock(\PDO::class);
616+
$statement = $this->createMock(\PDOStatement::class);
617+
618+
// Test that fuzzyWeight controls the weight in RRF formula
619+
$store = new HybridStore(
620+
$pdo,
621+
'hybrid_table',
622+
semanticRatio: 0.4, // 60% non-semantic
623+
fuzzyWeight: 0.5 // 50% of non-semantic goes to fuzzy
624+
);
625+
// Expected: 40% vector, 30% BM25 (60% * 0.5), 30% fuzzy (60% * 0.5)
626+
627+
$pdo->expects($this->once())
628+
->method('prepare')
629+
->with($this->callback(function ($sql) {
630+
// Verify fuzzy weight is present in the RRF formula
631+
$this->assertStringContainsString('fuzzy_scores AS', $sql);
632+
$this->assertStringContainsString('combined_results AS', $sql);
633+
634+
// Should have three components: vector, BM25, fuzzy
635+
$this->assertStringContainsString('COALESCE(1.0 / (', $sql); // RRF formula pattern
636+
637+
return true;
638+
}))
639+
->willReturn($statement);
640+
641+
$statement->expects($this->once())->method('execute');
642+
$statement->expects($this->once())->method('fetchAll')->willReturn([]);
643+
644+
$store->query(new Vector([0.1, 0.2, 0.3]), ['q' => 'test']);
645+
}
646+
532647
private function normalizeQuery(string $query): string
533648
{
534649
// Remove extra spaces, tabs and newlines

0 commit comments

Comments
 (0)