diff --git a/changelog/unreleased/SOLR-18267-add-flat-vector-index.yml b/changelog/unreleased/SOLR-18267-add-flat-vector-index.yml new file mode 100644 index 000000000000..94bb9c5b7f24 --- /dev/null +++ b/changelog/unreleased/SOLR-18267-add-flat-vector-index.yml @@ -0,0 +1,7 @@ +title: Add knnAlgorithm="flat" option to DenseVectorField to skip HNSW graph construction for exact vector search +type: added +authors: +- name: Adam Quigley +links: +- name: SOLR-18267 + url: https://issues.apache.org/jira/browse/SOLR-18267 \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java index 1746c0ce3f63..4d4815fbfd73 100644 --- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java +++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java @@ -128,7 +128,8 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { FieldType fieldType = (schemaField == null ? null : schemaField.getType()); if (fieldType instanceof DenseVectorField vectorField) { final String knnAlgorithm = vectorField.getKnnAlgorithm(); - if (!DenseVectorField.HNSW_ALGORITHM.equals(knnAlgorithm)) { + if (!DenseVectorField.HNSW_ALGORITHM.equals(knnAlgorithm) + && !DenseVectorField.FLAT_ALGORITHM.equals(knnAlgorithm)) { throw new SolrException( ErrorCode.SERVER_ERROR, knnAlgorithm + " KNN algorithm is not supported"); } diff --git a/solr/core/src/java/org/apache/solr/core/Solr101FlatVectorFormat.java b/solr/core/src/java/org/apache/solr/core/Solr101FlatVectorFormat.java new file mode 100644 index 000000000000..d81119f0156d --- /dev/null +++ b/solr/core/src/java/org/apache/solr/core/Solr101FlatVectorFormat.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.core; + +import java.io.IOException; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +/** + * SPI-registered wrapper for {@link Lucene99FlatVectorsFormat}, which Lucene does not register as a + * {@link KnnVectorsFormat} in SPI. + * + * @lucene.spi {@value #NAME} + * @since 10.1 + */ +public final class Solr101FlatVectorFormat extends KnnVectorsFormat { + + static final String NAME = "Solr101FlatVectorFormat"; + + private final Lucene99FlatVectorsFormat delegate; + + public Solr101FlatVectorFormat() { + super(NAME); + this.delegate = + new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()); + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return delegate.fieldsWriter(state); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return delegate.fieldsReader(state); + } + + @Override + public int getMaxDimensions(String fieldName) { + return delegate.getMaxDimensions(fieldName); + } + + @Override + public String toString() { + return NAME; + } +} diff --git a/solr/core/src/java/org/apache/solr/schema/BinaryQuantizedDenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/BinaryQuantizedDenseVectorField.java index d8e15f16435e..28da4f678da9 100644 --- a/solr/core/src/java/org/apache/solr/schema/BinaryQuantizedDenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/BinaryQuantizedDenseVectorField.java @@ -16,12 +16,25 @@ */ package org.apache.solr.schema; +import java.util.Map; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.lucene104.Lucene104HnswScalarQuantizedVectorsFormat; import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding; +import org.apache.solr.common.SolrException; public class BinaryQuantizedDenseVectorField extends DenseVectorField { + @Override + public void init(IndexSchema schema, Map args) { + super.init(schema, args); + + if (FLAT_ALGORITHM.equals(getKnnAlgorithm())) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "knnAlgorithm 'flat' is not supported for BinaryQuantizedDenseVectorField"); + } + } + @Override public KnnVectorsFormat buildKnnVectorsFormat() { return new Lucene104HnswScalarQuantizedVectorsFormat( diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index f29714d5b1ee..d1f1430bcee6 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -45,6 +45,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.solr.common.SolrException; +import org.apache.solr.core.Solr101FlatVectorFormat; import org.apache.solr.search.QParser; import org.apache.solr.search.vector.KnnQParser.EarlyTerminationParams; import org.apache.solr.search.vector.SolrKnnByteVectorQuery; @@ -69,6 +70,7 @@ public class DenseVectorField extends FloatPointField { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String HNSW_ALGORITHM = "hnsw"; + public static final String FLAT_ALGORITHM = "flat"; public static final String CAGRA_HNSW_ALGORITHM = "cagra_hnsw"; public static final String DEFAULT_KNN_ALGORITHM = HNSW_ALGORITHM; static final String KNN_VECTOR_DIMENSION = "vectorDimension"; @@ -471,7 +473,11 @@ public DenseVectorParser getVectorBuilder( } public KnnVectorsFormat buildKnnVectorsFormat() { - return new Lucene99HnswVectorsFormat(hnswM, hnswEfConstruction); + if (FLAT_ALGORITHM.equals(knnAlgorithm)) { + return new Solr101FlatVectorFormat(); + } else { + return new Lucene99HnswVectorsFormat(hnswM, hnswEfConstruction); + } } @Override @@ -503,6 +509,13 @@ public Query getKnnVectorQuery( EarlyTerminationParams earlyTermination, Integer filteredSearchThreshold) { + if (FLAT_ALGORITHM.equals(knnAlgorithm)) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "KNN vector queries are not supported for fields using knnAlgorithm=\"flat\". " + + "Use vectorSimilarity() function queries instead."); + } + DenseVectorParser vectorBuilder = getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY); diff --git a/solr/core/src/java/org/apache/solr/schema/ScalarQuantizedDenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/ScalarQuantizedDenseVectorField.java index 4d91d1eafe2b..26a33be48a24 100644 --- a/solr/core/src/java/org/apache/solr/schema/ScalarQuantizedDenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/ScalarQuantizedDenseVectorField.java @@ -122,6 +122,12 @@ public void init(IndexSchema schema, Map args) { } super.init(schema, args); + + if (FLAT_ALGORITHM.equals(getKnnAlgorithm())) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "knnAlgorithm 'flat' is not supported for ScalarQuantizedDenseVectorField"); + } } @Override diff --git a/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java index 2170f56f19b8..d376bfa5a40f 100644 --- a/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java @@ -119,6 +119,14 @@ public Query parse() throws SyntaxError { final String vectorField = getFieldName(); final SchemaField schemaField = req.getCore().getLatestSchema().getField(getFieldName()); final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); + + if (DenseVectorField.FLAT_ALGORITHM.equals(denseVectorType.getKnnAlgorithm())) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "The {!knn} query parser is not supported for fields using knnAlgorithm=\"flat\". " + + "Use vectorSimilarity() function queries instead."); + } + final String vectorToSearch = getVectorToSearch(); final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); diff --git a/solr/core/src/java/org/apache/solr/search/vector/VectorSimilarityQParser.java b/solr/core/src/java/org/apache/solr/search/vector/VectorSimilarityQParser.java index f2d9be3738ad..a88d1dfb3062 100644 --- a/solr/core/src/java/org/apache/solr/search/vector/VectorSimilarityQParser.java +++ b/solr/core/src/java/org/apache/solr/search/vector/VectorSimilarityQParser.java @@ -46,6 +46,14 @@ public Query parse() throws SyntaxError { final String fieldName = getFieldName(); final SchemaField schemaField = req.getCore().getLatestSchema().getField(fieldName); final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); + + if (DenseVectorField.FLAT_ALGORITHM.equals(denseVectorType.getKnnAlgorithm())) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "The {!vectorSimilarity} query parser is not supported for fields using knnAlgorithm=\"flat\". " + + "Use vectorSimilarity() function queries instead."); + } + final String vectorToSearch = getVectorToSearch(); final float minTraverse = localParams.getFloat(MIN_TRAVERSE, DEFAULT_MIN_TRAVERSE); final Float minReturn = localParams.getFloat(MIN_RETURN); diff --git a/solr/core/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/solr/core/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat new file mode 100644 index 000000000000..2e6fda571dbd --- /dev/null +++ b/solr/core/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.solr.core.Solr101FlatVectorFormat diff --git a/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-flat-binaryQuantized.xml b/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-flat-binaryQuantized.xml new file mode 100644 index 000000000000..f4e6d392f9e4 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-flat-binaryQuantized.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + id + \ No newline at end of file diff --git a/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-flat-scalarQuantized.xml b/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-flat-scalarQuantized.xml new file mode 100644 index 000000000000..365bddd03e77 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-flat-scalarQuantized.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + id + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-densevector-flat.xml b/solr/core/src/test-files/solr/collection1/conf/schema-densevector-flat.xml new file mode 100644 index 000000000000..a1e66ee3aa27 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-densevector-flat.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + id + diff --git a/solr/core/src/test/org/apache/solr/schema/BinaryQuantizedDenseVectorFieldTest.java b/solr/core/src/test/org/apache/solr/schema/BinaryQuantizedDenseVectorFieldTest.java index de08d3f7ed22..7e0e798bbf2b 100644 --- a/solr/core/src/test/org/apache/solr/schema/BinaryQuantizedDenseVectorFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/BinaryQuantizedDenseVectorFieldTest.java @@ -38,6 +38,14 @@ public void fieldDefinition_correctConfiguration_shouldLoadSchemaField() throws } } + @Test + public void fieldDefinition_flatAlgorithm_shouldThrowException() throws Exception { + assertConfigs( + "solrconfig-basic.xml", + "bad-schema-densevector-flat-binaryQuantized.xml", + "knnAlgorithm 'flat' is not supported for BinaryQuantizedDenseVectorField"); + } + // there are no major interface differences between BinaryBitQuantizedDenseVectorField and // DenseVectorField // so we can rely on those tests for validation cases diff --git a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java index f84eaf40ef0b..a11e184e2e11 100644 --- a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java @@ -1136,4 +1136,170 @@ public void testFilteredSearchThreshold_seededByteThresholdInInput_shouldSetCust deleteCore(); } } + + @Test + public void fieldDefinition_flatAlgorithm_shouldLoadSchemaField() throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-flat.xml"); + IndexSchema schema = h.getCore().getLatestSchema(); + + SchemaField vector = schema.getField("vector_flat"); + assertNotNull(vector); + + DenseVectorField type = (DenseVectorField) vector.getType(); + assertThat(type.getKnnAlgorithm(), is("flat")); + assertThat(type.getDimension(), is(4)); + assertThat(type.getSimilarityFunction(), is(VectorSimilarityFunction.COSINE)); + + assertTrue(vector.indexed()); + assertTrue(vector.stored()); + } finally { + deleteCore(); + } + } + + @Test + public void flatAlgorithm_knnQuery_shouldThrowException() throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-flat.xml"); + + assertQEx( + "Running {!knn} on a flat vector field should raise an Exception", + "knnAlgorithm=\"flat\"", + req("q", "{!knn f=vector_flat topK=2}[1, 2, 3, 4]", "fl", "id"), + SolrException.ErrorCode.BAD_REQUEST); + } finally { + deleteCore(); + } + } + + @Test + public void flatAlgorithm_vectorSimilarityFunction_shouldReturnResults() throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-flat.xml"); + + SolrInputDocument doc1 = new SolrInputDocument(); + doc1.addField("id", "0"); + doc1.addField("vector_flat", Arrays.asList(1, 2, 3, 4)); + assertU(adoc(doc1)); + + SolrInputDocument doc2 = new SolrInputDocument(); + doc2.addField("id", "1"); + doc2.addField("vector_flat", Arrays.asList(2, 3, 4, 5)); + assertU(adoc(doc2)); + + SolrInputDocument doc3 = new SolrInputDocument(); + doc3.addField("id", "2"); + doc3.addField("vector_flat", Arrays.asList(100, 200, 50, 25)); + assertU(adoc(doc3)); + + assertU(commit()); + + assertJQ( + req( + "q", "{!func}vectorSimilarity(vector_flat,[1, 2, 3, 4])", + "fl", "id,score"), + "/response/numFound==3", + "/response/docs/[0]/id=='0'", + "/response/docs/[0]/score==1.0"); + + // Filtered test + assertJQ( + req( + "q", "{!func}vectorSimilarity(vector_flat,[1, 2, 3, 4])", + "fq", "id:(0 2)", + "fl", "id,score"), + "/response/numFound==2", + "/response/docs/[0]/id=='0'", + "/response/docs/[0]/score==1.0"); + } finally { + deleteCore(); + } + } + + @Test + public void flatAlgorithm_storedField_shouldBeReturnedInResults() throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-flat.xml"); + + SolrInputDocument doc1 = new SolrInputDocument(); + doc1.addField("id", "0"); + doc1.addField("vector_flat", Arrays.asList(1.1f, 2.2f, 3.3f, 4.4f)); + assertU(adoc(doc1)); + assertU(commit()); + + assertJQ( + req("q", "id:0", "fl", "vector_flat"), + "/response/docs/[0]=={'vector_flat':[1.1,2.2,3.3,4.4]}"); + } finally { + deleteCore(); + } + } + + @Test + public void flatAlgorithm_byteEncoding_shouldWork() throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-flat.xml"); + + SolrInputDocument doc1 = new SolrInputDocument(); + doc1.addField("id", "0"); + doc1.addField("vector_flat_byte", Arrays.asList(1, 2, 3, 4)); + assertU(adoc(doc1)); + + SolrInputDocument doc2 = new SolrInputDocument(); + doc2.addField("id", "1"); + doc2.addField("vector_flat_byte", Arrays.asList(5, 6, 7, 8)); + assertU(adoc(doc2)); + + assertU(commit()); + + assertJQ( + req( + "q", "{!func}vectorSimilarity(vector_flat_byte,[1, 2, 3, 4])", + "fl", "id,score"), + "/response/numFound==2", + "/response/docs/[0]/id=='0'", + "/response/docs/[0]/score==1.0"); + } finally { + deleteCore(); + } + } + + @Test + public void flatAlgorithm_vectorSimilarityQParser_shouldThrowException() throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-flat.xml"); + + assertQEx( + "Running {!vectorSimilarity} on a flat vector field should raise an Exception", + "knnAlgorithm=\"flat\"", + req( + "q", "{!vectorSimilarity f=vector_flat minReturn=0.99}[1, 2, 3, 4]", + "fl", "id"), + SolrException.ErrorCode.BAD_REQUEST); + } finally { + deleteCore(); + } + } + + @Test + public void flatAlgorithm_getKnnVectorQuery_shouldThrowException() throws Exception { + try { + initCore("solrconfig_codec.xml", "schema-densevector-flat.xml"); + IndexSchema schema = h.getCore().getLatestSchema(); + SchemaField vectorField = schema.getField("vector_flat"); + assertNotNull(vectorField); + DenseVectorField type = (DenseVectorField) vectorField.getType(); + + SolrException ex = + expectThrows( + SolrException.class, + () -> + type.getKnnVectorQuery( + "vector_flat", "[1, 2, 3, 4]", 3, 3, null, null, null, null)); + assertTrue(ex.getMessage().contains("knnAlgorithm=\"flat\"")); + } finally { + deleteCore(); + } + } } diff --git a/solr/core/src/test/org/apache/solr/schema/ScalarQuantizedDenseVectorFieldTest.java b/solr/core/src/test/org/apache/solr/schema/ScalarQuantizedDenseVectorFieldTest.java index 8168288c044f..94ebc63003be 100644 --- a/solr/core/src/test/org/apache/solr/schema/ScalarQuantizedDenseVectorFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/ScalarQuantizedDenseVectorFieldTest.java @@ -20,13 +20,9 @@ import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.solr.core.AbstractBadConfigTestBase; -import org.junit.Before; import org.junit.Test; public class ScalarQuantizedDenseVectorFieldTest extends AbstractBadConfigTestBase { - @Before - public void init() {} - @Test public void fieldTypeDefinition_invalidBitSize_shouldThrowException() throws Exception { assertConfigs( @@ -132,4 +128,12 @@ public void fieldDefinition_dynamicConfidenceInterval_shouldLoadSchemaField() th deleteCore(); } } + + @Test + public void fieldDefinition_flatAlgorithm_shouldThrowException() throws Exception { + assertConfigs( + "solrconfig-basic.xml", + "bad-schema-densevector-flat-scalarQuantized.xml", + "knnAlgorithm 'flat' is not supported for ScalarQuantizedDenseVectorField"); + } } diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index 2681cc598409..a9e965f0ae61 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -126,8 +126,10 @@ Here's how `DenseVectorField` can be configured with the advanced hyperparameter + (advanced) Specifies the underlying knn algorithm to use + -Accepted values: `hnsw`, `cagra_hnsw` (requires GPU acceleration setup). - +Accepted values: `hnsw`, `flat`, `cagra_hnsw` (requires GPU acceleration setup). ++ +The `flat` algorithm stores vectors without building an HNSW graph. This avoids the indexing overhead of graph construction, but does not support the `{!knn}` query parser. Use `vectorSimilarity()` function queries to score and rank documents by vector similarity. See <> for details. ++ Please note that the `knnAlgorithm` accepted values may change in future releases. `vectorEncoding`:: @@ -837,6 +839,17 @@ Details about using the ReRank Query Parser can be found in the xref:query-guide ==== +== Flat Vector Index + +Setting `knnAlgorithm="flat"` stores vectors without building an HNSW graph, avoiding the indexing cost of graph construction. + +Flat fields do not support the `{!knn}`, `{!knn_text_to_vector}`, or `{!vectorSimilarity}` query parsers. +Use `vectorSimilarity()` function queries to score and rank by similarity: + +[source] +q={!func}vectorSimilarity(vector,[1.0, 2.0, 3.0, 4.0])&fl=id,score + + == Indexing Multi-Vectors for Late Interaction For Late Interaction usecases, Solr provides a `StrFloatLateInteractionVectorField` field type, which supports indexing a variable length "Multi-Vector" of Float vectors, serialized as as a single String value.