Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions docs/docs/multimodal-table/global-index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,38 @@ Supported vector index options:
| `<index-type>.hnsw.ef-construction` | `150` | HNSW construction search width for `ivf-hnsw-flat` and `ivf-hnsw-sq`. |
| `<index-type>.hnsw.max-level` | `7` | Maximum HNSW level for `ivf-hnsw-flat` and `ivf-hnsw-sq`. |

**Per-Field Options**

The options above can also be set at the table level (in `TBLPROPERTIES`), where they are shared
by every vector column of the same index type. When a table has multiple vector columns, you can
scope an option to a single column with `fields.<field-name>.<option>`. The field-level
form takes precedence over the column-agnostic `<index-type>.<option>` for that column. Use the
stored table column name exactly as `<field-name>`. Field-level vector options do not include the
index-type prefix; for example, use `fields.image_embedding.nlist` to override the shared
`ivf-pq.nlist` option for `image_embedding`:

```sql
CREATE TABLE my_table (
id INT,
title_embedding ARRAY<FLOAT>,
image_embedding ARRAY<FLOAT>
) TBLPROPERTIES (
'bucket' = '-1',
'row-tracking.enabled' = 'true',
'data-evolution.enabled' = 'true',
'global-index.enabled' = 'true',
-- per-column dimensions
'fields.title_embedding.dimension' = '768',
'fields.image_embedding.dimension' = '512',
-- shared by every ivf-pq column, overridden only for 'image_embedding'
'ivf-pq.nlist' = '256',
'fields.image_embedding.nlist' = '512'
);
```

With the properties above, `title_embedding` is indexed with `nlist=256` while `image_embedding`
uses `nlist=512`.

**Vector Search**

Search-time options are passed with each vector search request:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,20 @@ public abstract class VectorGlobalIndexerFactory implements GlobalIndexerFactory
public GlobalIndexer create(DataField field, Options options) {
String identifier = identifier();
return new VectorGlobalIndexer(
field.type(), nativeOptions(field.type(), options, identifier), identifier);
field.type(),
nativeOptions(field.type(), options, identifier, field.name()),
identifier);
}

static Map<String, String> nativeOptions(
DataType fieldType, Options tableOptions, String identifier) {
DataType fieldType, Options tableOptions, String identifier, String fieldName) {
Map<String, String> nativeOptions = new LinkedHashMap<>();
String optionPrefix = identifier + ".";
for (Map.Entry<String, String> entry : tableOptions.toMap().entrySet()) {
String fieldPrefix = "fields." + fieldName + ".";
Map<String, String> tableOptionsMap = tableOptions.toMap();

// First collect index-type level options, e.g. <index-type>.xxx.
for (Map.Entry<String, String> entry : tableOptionsMap.entrySet()) {
String optionKey = entry.getKey();
if (optionKey.startsWith(optionPrefix)) {
String nativeKey = nativeOptionKey(optionKey.substring(optionPrefix.length()));
Expand All @@ -53,6 +59,19 @@ static Map<String, String> nativeOptions(
}
}
}

// Then collect field level options, e.g. fields.<field-name>.xxx, which take precedence
// over the index-type level options for this field.
for (Map.Entry<String, String> entry : tableOptionsMap.entrySet()) {
String optionKey = entry.getKey();
if (optionKey.startsWith(fieldPrefix)) {
String nativeKey = nativeOptionKey(optionKey.substring(fieldPrefix.length()));
if (nativeKey != null) {
nativeOptions.put(nativeKey, entry.getValue());
}
}
}

nativeOptions.put("index.type", identifier.replace('-', '_'));
nativeOptions.put(
"dimension", String.valueOf(dimension(fieldType, nativeOptions, identifier)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ public void testViaIndexer() throws IOException {
new VectorGlobalIndexer(
vectorType,
VectorGlobalIndexerFactory.nativeOptions(
vectorType, options, IVF_PQ_IDENTIFIER),
vectorType, options, IVF_PQ_IDENTIFIER, fieldName),
IVF_PQ_IDENTIFIER);

GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
Expand All @@ -373,7 +373,8 @@ private VectorGlobalIndexWriter createIvfPqWriter(
return new VectorGlobalIndexWriter(
fileWriter,
fieldType,
VectorGlobalIndexerFactory.nativeOptions(fieldType, options, IVF_PQ_IDENTIFIER),
VectorGlobalIndexerFactory.nativeOptions(
fieldType, options, IVF_PQ_IDENTIFIER, fieldName),
IVF_PQ_IDENTIFIER);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ public void testNativeOptionsOnlyUsesIdentifierPrefix() {
VectorGlobalIndexerFactory.nativeOptions(
new ArrayType(new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER);
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"vec");

assertThat(nativeOptions)
.containsEntry("index.type", "ivf_flat")
Expand All @@ -92,7 +93,8 @@ public void testNativeOptionsUsesVectorTypeDimension() {
VectorGlobalIndexerFactory.nativeOptions(
new VectorType(8, new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER);
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"vec");

assertThat(nativeOptions).containsEntry("dimension", "8");
}
Expand All @@ -107,9 +109,112 @@ public void testInvalidDimension() {
VectorGlobalIndexerFactory.nativeOptions(
new ArrayType(new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER))
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"vec"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("ivf-flat.dimension")
.hasMessageContaining("positive integer");
}

@Test
public void testFieldLevelOptionsOverrideIndexTypeOptions() {
Options options = new Options();
options.setString("ivf-flat.dimension", "32");
options.setString("ivf-flat.nlist", "128");
options.setString("fields.vec.nlist", "256");

Map<String, String> nativeOptions =
VectorGlobalIndexerFactory.nativeOptions(
new ArrayType(new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"vec");

assertThat(nativeOptions)
.containsEntry("dimension", "32")
.containsEntry("nlist", "256")
.doesNotContainEntry("nlist", "128");
}

@Test
public void testFieldLevelDimensionOverridesIndexTypeDimension() {
Options options = new Options();
options.setString("ivf-flat.dimension", "32");
options.setString("fields.vec.dimension", "64");

Map<String, String> nativeOptions =
VectorGlobalIndexerFactory.nativeOptions(
new ArrayType(new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"vec");

assertThat(nativeOptions).containsEntry("dimension", "64");
}

@Test
public void testFieldLevelOptionsOnlyApplyToMatchingField() {
Options options = new Options();
options.setString("ivf-flat.nlist", "128");
options.setString("fields.vec.nlist", "256");

Map<String, String> nativeOptions =
VectorGlobalIndexerFactory.nativeOptions(
new ArrayType(new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"other");

assertThat(nativeOptions).containsEntry("nlist", "128");
}

@Test
public void testFieldLevelOptionsRequireExactFieldName() {
Options options = new Options();
options.setString("ivf-flat.nlist", "128");
options.setString("fields.vec_extra.nlist", "512");

Map<String, String> nativeOptions =
VectorGlobalIndexerFactory.nativeOptions(
new ArrayType(new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"vec");

assertThat(nativeOptions).containsEntry("nlist", "128");
}

@Test
public void testFieldLevelOptionsWithoutIndexTypeOption() {
Options options = new Options();
options.setString("fields.vec.distance.metric", "cosine");

Map<String, String> nativeOptions =
VectorGlobalIndexerFactory.nativeOptions(
new ArrayType(new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"vec");

assertThat(nativeOptions).containsEntry("metric", "cosine");
}

@Test
public void testFieldLevelVectorOptionsCoexistWithCoreFieldOptions() {
Options options = new Options();
options.setString("ivf-flat.nlist", "128");
options.setString("fields.vec.nlist", "256");
options.setString("fields.vec.aggregate-function", "sum");

Map<String, String> nativeOptions =
VectorGlobalIndexerFactory.nativeOptions(
new ArrayType(new FloatType()),
options,
IvfFlatVectorGlobalIndexerFactory.IDENTIFIER,
"vec");

assertThat(nativeOptions)
.containsEntry("nlist", "256")
.doesNotContainKey("aggregate-function");
}
}
Loading