diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java index cefd4a0bdbe7..612bc778998e 100644 --- a/solr/core/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java @@ -23,8 +23,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.document.SortedDocValuesField; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; @@ -189,9 +187,13 @@ public List createFields(SchemaField field, Object value) { IndexableField docval; final BytesRef bytes = new BytesRef(toInternal(value.toString())); if (field.multiValued()) { - docval = new SortedSetDocValuesField(field.getName(), bytes); + docval = + DocValuesFieldUtil.createSortedSetDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList()); } else { - docval = new SortedDocValuesField(field.getName(), bytes); + docval = + DocValuesFieldUtil.createSortedDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList()); } // Only create a list of we have 2 values... diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index ce4a1828dc7e..92ac5179e4b3 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -30,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.collation.CollationKeyAnalyzer; -import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; @@ -253,9 +252,13 @@ public List createFields(SchemaField field, Object value) { fields.add(createField(field, value)); final BytesRef bytes = getCollationKey(field.getName(), value.toString()); if (field.multiValued()) { - fields.add(new SortedSetDocValuesField(field.getName(), bytes)); + fields.add( + DocValuesFieldUtil.createSortedSetDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList())); } else { - fields.add(new SortedDocValuesField(field.getName(), bytes)); + fields.add( + DocValuesFieldUtil.createSortedDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList())); } return fields; } else { diff --git a/solr/core/src/java/org/apache/solr/schema/DocValuesFieldUtil.java b/solr/core/src/java/org/apache/solr/schema/DocValuesFieldUtil.java new file mode 100644 index 000000000000..f583f6282602 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/DocValuesFieldUtil.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; + +/** + * Utility class for creating DocValues {@link IndexableField} instances with optional skip index + * support. + * + *

When {@code hasDocValuesSkipList} is true, this utility creates indexed DocValues fields that + * include a range skip index for more efficient range queries. Otherwise, it creates standard + * DocValues fields. + */ +public final class DocValuesFieldUtil { + + private DocValuesFieldUtil() { + // utility class + } + + /** + * Creates a sorted DocValues field for a single-value (non-multi-valued) string/bytes field. + * + * @param fieldName the field name + * @param value the bytes value + * @param hasDocValuesSkipList whether to include a range skip index + * @return the created IndexableField + */ + public static IndexableField createSortedDocValuesField( + String fieldName, BytesRef value, boolean hasDocValuesSkipList) { + return hasDocValuesSkipList + ? SortedDocValuesField.indexedField(fieldName, value) + : new SortedDocValuesField(fieldName, value); + } + + /** + * Creates a sorted set DocValues field for a multi-valued string/bytes field. + * + * @param fieldName the field name + * @param value the bytes value + * @param hasDocValuesSkipList whether to include a range skip index + * @return the created IndexableField + */ + public static IndexableField createSortedSetDocValuesField( + String fieldName, BytesRef value, boolean hasDocValuesSkipList) { + return hasDocValuesSkipList + ? SortedSetDocValuesField.indexedField(fieldName, value) + : new SortedSetDocValuesField(fieldName, value); + } + + /** + * Creates a numeric DocValues field for a single-value numeric field. + * + * @param fieldName the field name + * @param value the long value + * @param hasDocValuesSkipList whether to include a range skip index + * @return the created IndexableField + */ + public static IndexableField createNumericDocValuesField( + String fieldName, long value, boolean hasDocValuesSkipList) { + return hasDocValuesSkipList + ? NumericDocValuesField.indexedField(fieldName, value) + : new NumericDocValuesField(fieldName, value); + } + + /** + * Creates a sorted numeric DocValues field for a multi-valued numeric field. + * + * @param fieldName the field name + * @param value the long value + * @param hasDocValuesSkipList whether to include a range skip index + * @return the created IndexableField + */ + public static IndexableField createSortedNumericDocValuesField( + String fieldName, long value, boolean hasDocValuesSkipList) { + return hasDocValuesSkipList + ? SortedNumericDocValuesField.indexedField(fieldName, value) + : new SortedNumericDocValuesField(fieldName, value); + } +} diff --git a/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java b/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java index 47aaf472d465..1b512f501685 100644 --- a/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java @@ -419,9 +419,13 @@ public List createFields(SchemaField sf, Object value) { fields.add(field); final long longValue = field.numericValue().longValue(); if (sf.multiValued()) { - fields.add(new SortedNumericDocValuesField(sf.getName(), longValue)); + fields.add( + DocValuesFieldUtil.createSortedNumericDocValuesField( + sf.getName(), longValue, sf.hasDocValuesSkipList())); } else { - fields.add(new NumericDocValuesField(sf.getName(), longValue)); + fields.add( + DocValuesFieldUtil.createNumericDocValuesField( + sf.getName(), longValue, sf.hasDocValuesSkipList())); } return fields; } diff --git a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java index 91f3caa38e63..7c408fbbc926 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java @@ -52,6 +52,7 @@ public abstract class FieldProperties { protected static final int USE_DOCVALUES_AS_STORED = 0b100000000000000000; protected static final int LARGE_FIELD = 0b1000000000000000000; protected static final int UNINVERTIBLE = 0b10000000000000000000; + protected static final int DOC_VALUES_SKIP_LIST = 0b100000000000000000000; static final String[] propertyNames = { "indexed", @@ -73,7 +74,8 @@ public abstract class FieldProperties { "termPayloads", "useDocValuesAsStored", "large", - "uninvertible" + "uninvertible", + "docValuesSkipList" }; static final Map propertyMap = new HashMap<>(); diff --git a/solr/core/src/java/org/apache/solr/schema/PointField.java b/solr/core/src/java/org/apache/solr/schema/PointField.java index e74e73f13207..c31257181aa0 100644 --- a/solr/core/src/java/org/apache/solr/schema/PointField.java +++ b/solr/core/src/java/org/apache/solr/schema/PointField.java @@ -23,8 +23,6 @@ import java.util.Date; import java.util.List; import java.util.Map; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; @@ -292,7 +290,9 @@ public List createFields(SchemaField sf, Object value) { assert numericValue instanceof Double; bits = Double.doubleToLongBits(numericValue.doubleValue()); } - fields.add(new NumericDocValuesField(sf.getName(), bits)); + fields.add( + DocValuesFieldUtil.createNumericDocValuesField( + sf.getName(), bits, sf.hasDocValuesSkipList())); } else { // MultiValued if (numericValue instanceof Integer || numericValue instanceof Long) { @@ -303,7 +303,9 @@ public List createFields(SchemaField sf, Object value) { assert numericValue instanceof Double; bits = NumericUtils.doubleToSortableLong(numericValue.doubleValue()); } - fields.add(new SortedNumericDocValuesField(sf.getName(), bits)); + fields.add( + DocValuesFieldUtil.createSortedNumericDocValuesField( + sf.getName(), bits, sf.hasDocValuesSkipList())); } } if (sf.stored()) { diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaField.java b/solr/core/src/java/org/apache/solr/schema/SchemaField.java index ef3c5559affe..eefc7c29a56c 100644 --- a/solr/core/src/java/org/apache/solr/schema/SchemaField.java +++ b/solr/core/src/java/org/apache/solr/schema/SchemaField.java @@ -112,6 +112,10 @@ public boolean hasDocValues() { return (properties & DOC_VALUES) != 0; } + public boolean hasDocValuesSkipList() { + return (properties & DOC_VALUES_SKIP_LIST) != 0; + } + public boolean storeTermVector() { return (properties & STORE_TERMVECTORS) != 0; } @@ -373,6 +377,18 @@ static int calcProps(String name, FieldType ft, Map props) { p &= ~pp; } + if (on(falseProps, DOC_VALUES)) { + int pp = DOC_VALUES_SKIP_LIST; + if (on(DOC_VALUES_SKIP_LIST, trueProps)) { + throw new RuntimeException( + "SchemaField: " + + name + + " conflicting 'true' field options for non-docValues field:" + + props); + } + p &= ~pp; + } + if (on(falseProps, INDEXED)) { int pp = (OMIT_NORMS | OMIT_TF_POSITIONS | OMIT_POSITIONS); if (on(pp, falseProps)) { @@ -466,6 +482,7 @@ public SimpleOrderedMap getNamedPropertyValues(boolean showDefaults) { properties.add(getPropertyName(REQUIRED), isRequired()); properties.add(getPropertyName(TOKENIZED), isTokenized()); properties.add(getPropertyName(USE_DOCVALUES_AS_STORED), useDocValuesAsStored()); + properties.add(getPropertyName(DOC_VALUES_SKIP_LIST), hasDocValuesSkipList()); // The BINARY property is always false // properties.add(getPropertyName(BINARY), isBinary()); } else { @@ -532,6 +549,11 @@ public DocValuesType docValuesType() { return DocValuesType.NONE; } + /** + * For fields with docValues the underlaying lucene field is created without passing these values + * as is. Instead the creating class should check on {@link #hasDocValuesSkipList()} and create + * the appropriate field type. + */ @Override public DocValuesSkipIndexType docValuesSkipIndexType() { return DocValuesSkipIndexType.NONE; diff --git a/solr/core/src/java/org/apache/solr/schema/SortableTextField.java b/solr/core/src/java/org/apache/solr/schema/SortableTextField.java index 02d74724f882..872150e024c7 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableTextField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableTextField.java @@ -19,8 +19,6 @@ import java.util.Arrays; import java.util.List; import java.util.Map; -import org.apache.lucene.document.SortedDocValuesField; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; @@ -133,8 +131,10 @@ private static List getIndexableFields( SchemaField field, IndexableField f, BytesRef bytes) { final IndexableField docval = field.multiValued() - ? new SortedSetDocValuesField(field.getName(), bytes) - : new SortedDocValuesField(field.getName(), bytes); + ? DocValuesFieldUtil.createSortedSetDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList()) + : DocValuesFieldUtil.createSortedDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList()); if (null == f) { return List.of(docval); diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index bc437553fe43..8dbec0e14585 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -20,8 +20,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.apache.lucene.document.SortedDocValuesField; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; @@ -49,9 +47,13 @@ public List createFields(SchemaField field, Object value) { IndexableField docval; final BytesRef bytes = getBytesRef(value); if (field.multiValued()) { - docval = new SortedSetDocValuesField(field.getName(), bytes); + docval = + DocValuesFieldUtil.createSortedSetDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList()); } else { - docval = new SortedDocValuesField(field.getName(), bytes); + docval = + DocValuesFieldUtil.createSortedDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList()); } // Only create a list of we have 2 values... diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java index 9512176d33d1..3218c18e9abf 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java @@ -25,8 +25,6 @@ import java.util.Locale; import java.util.Map; import org.apache.lucene.document.Field; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; @@ -673,7 +671,9 @@ public List createFields(SchemaField sf, Object value) { if (sf.multiValued()) { BytesRefBuilder bytes = new BytesRefBuilder(); storedToIndexed(field, bytes); - fields.add(new SortedSetDocValuesField(sf.getName(), bytes.get())); + fields.add( + DocValuesFieldUtil.createSortedSetDocValuesField( + sf.getName(), bytes.get(), sf.hasDocValuesSkipList())); } else { final long bits; if (field.numericValue() instanceof Integer || field.numericValue() instanceof Long) { @@ -684,7 +684,9 @@ public List createFields(SchemaField sf, Object value) { assert field.numericValue() instanceof Double; bits = Double.doubleToLongBits(field.numericValue().doubleValue()); } - fields.add(new NumericDocValuesField(sf.getName(), bits)); + fields.add( + DocValuesFieldUtil.createNumericDocValuesField( + sf.getName(), bits, sf.hasDocValuesSkipList())); } return fields; diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-docValuesSkipList.xml b/solr/core/src/test-files/solr/collection1/conf/schema-docValuesSkipList.xml new file mode 100644 index 000000000000..823c98b9dda0 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-docValuesSkipList.xml @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java index 5eb97247a399..012597d2f26b 100644 --- a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java +++ b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java @@ -25,7 +25,7 @@ public void testGetField() { assertQ( "/schema/fields/test_postv?indent=on&wt=xml&showDefaults=true", "count(/response/lst[@name='field']) = 1", - "count(/response/lst[@name='field']/*) = 19", + "count(/response/lst[@name='field']/*) = 20", "/response/lst[@name='field']/str[@name='name'] = 'test_postv'", "/response/lst[@name='field']/str[@name='type'] = 'text'", "/response/lst[@name='field']/bool[@name='indexed'] = 'true'", @@ -44,7 +44,8 @@ public void testGetField() { "/response/lst[@name='field']/bool[@name='large'] = 'false'", "/response/lst[@name='field']/bool[@name='required'] = 'false'", "/response/lst[@name='field']/bool[@name='tokenized'] = 'true'", - "/response/lst[@name='field']/bool[@name='useDocValuesAsStored'] = 'true'"); + "/response/lst[@name='field']/bool[@name='useDocValuesAsStored'] = 'true'", + "/response/lst[@name='field']/bool[@name='docValuesSkipList'] = 'false'"); } @Test diff --git a/solr/core/src/test/org/apache/solr/schema/DocValuesSkipListTest.java b/solr/core/src/test/org/apache/solr/schema/DocValuesSkipListTest.java new file mode 100644 index 000000000000..7b39f5ad984f --- /dev/null +++ b/solr/core/src/test/org/apache/solr/schema/DocValuesSkipListTest.java @@ -0,0 +1,661 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; +import java.util.function.Supplier; +import org.apache.lucene.index.DocValuesSkipIndexType; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.util.NumericUtils; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.RefCounted; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DocValuesSkipListTest extends SolrTestCaseJ4 { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private static final String[] DOC_VALUES_FIELDS = { + "floatdv", + "intdv", + "doubledv", + "longdv", + "datedv", + "stringdv", + "booldv", + "collateddv", + "sortableTextdv", + "enumdv", + "trieIntdv", + "trieLongdv", + "trieFloatdv", + "trieDoubledv", + "trieDatedv", + "floatdvs", + "intdvs", + "doubledvs", + "longdvs", + "datedvs", + "stringdvs", + "booldvs", + "collateddvs", + "sortableTextdvs", + "enumdvs", + "trieIntdvs", + "trieLongdvs", + "trieFloatdvs", + "trieDoubledvs", + "trieDatedvs" + }; + + @BeforeClass + public static void beforeTests() throws Exception { + initCore("solrconfig-basic.xml", "schema-docValuesSkipList.xml"); + + // sanity check our schema meets our expectations + final IndexSchema schema = h.getCore().getLatestSchema(); + for (String fieldName : DOC_VALUES_FIELDS) { + final SchemaField sf = schema.getField(fieldName); + if (fieldName.endsWith("dvs")) { + assertTrue( + fieldName + " is not multiValued, test is useless, who changed the schema?", + sf.multiValued()); + } else { + assertFalse( + fieldName + " is multiValued, test is useless, who changed the schema?", + sf.multiValued()); + } + if (!fieldName.contains("enum")) { + assertFalse( + fieldName + " is indexed, test is useless, who changed the schema?", sf.indexed()); + } else { + assertTrue( + fieldName + " is indexed, test is useless, who changed the schema?", sf.indexed()); + } + assertTrue( + fieldName + " has no docValues, test is useless, who changed the schema?", + sf.hasDocValues()); + assertTrue( + fieldName + " has no docValuesSkipList, test is useless, who changed the schema?", + sf.hasDocValuesSkipList()); + } + } + + @Override + public void setUp() throws Exception { + super.setUp(); + assertU(delQ("*:*")); + } + + @Test + public void testDocValues() throws IOException { + assertU(adoc("id", "1")); + assertU(commit()); + try (SolrCore core = h.getCoreInc()) { + final RefCounted searcherRef = core.openNewSearcher(true, true); + final SolrIndexSearcher searcher = searcherRef.get(); + try { + final LeafReader reader = searcher.getSlowAtomicReader(); + assertEquals(1, reader.numDocs()); + final FieldInfos infos = reader.getFieldInfos(); + + for (String f : DOC_VALUES_FIELDS) { + assertEquals(DocValuesSkipIndexType.RANGE, infos.fieldInfo(f).docValuesSkipIndexType()); + } + + NumericDocValues dvs = reader.getNumericDocValues("floatdv"); + assertEquals(0, dvs.nextDoc()); + assertEquals((long) Float.floatToIntBits(1), dvs.longValue()); + dvs = reader.getNumericDocValues("intdv"); + assertEquals(0, dvs.nextDoc()); + assertEquals(2L, dvs.longValue()); + dvs = reader.getNumericDocValues("doubledv"); + assertEquals(0, dvs.nextDoc()); + assertEquals(Double.doubleToLongBits(3), dvs.longValue()); + dvs = reader.getNumericDocValues("longdv"); + assertEquals(0, dvs.nextDoc()); + assertEquals(4L, dvs.longValue()); + SortedDocValues sdv = reader.getSortedDocValues("stringdv"); + assertEquals(0, sdv.nextDoc()); + assertEquals("solr", sdv.lookupOrd(sdv.ordValue()).utf8ToString()); + sdv = reader.getSortedDocValues("booldv"); + assertEquals(0, sdv.nextDoc()); + assertEquals("T", sdv.lookupOrd(sdv.ordValue()).utf8ToString()); + + final IndexSchema schema = core.getLatestSchema(); + final SchemaField floatDv = schema.getField("floatdv"); + final SchemaField intDv = schema.getField("intdv"); + final SchemaField doubleDv = schema.getField("doubledv"); + final SchemaField longDv = schema.getField("longdv"); + final SchemaField boolDv = schema.getField("booldv"); + + FunctionValues values = + floatDv + .getType() + .getValueSource(floatDv, null) + .getValues(null, searcher.getSlowAtomicReader().leaves().get(0)); + assertEquals(1f, values.floatVal(0), 0f); + assertEquals(1f, values.objectVal(0)); + values = + intDv + .getType() + .getValueSource(intDv, null) + .getValues(null, searcher.getSlowAtomicReader().leaves().get(0)); + assertEquals(2, values.intVal(0)); + assertEquals(2, values.objectVal(0)); + values = + doubleDv + .getType() + .getValueSource(doubleDv, null) + .getValues(null, searcher.getSlowAtomicReader().leaves().get(0)); + assertEquals(3d, values.doubleVal(0), 0d); + assertEquals(3d, values.objectVal(0)); + values = + longDv + .getType() + .getValueSource(longDv, null) + .getValues(null, searcher.getSlowAtomicReader().leaves().get(0)); + assertEquals(4L, values.longVal(0)); + assertEquals(4L, values.objectVal(0)); + + values = + boolDv + .getType() + .getValueSource(boolDv, null) + .getValues(null, searcher.getSlowAtomicReader().leaves().get(0)); + assertEquals("true", values.strVal(0)); + assertEquals(true, values.objectVal(0)); + + // check reversibility of created fields + tstToObj(schema.getField("floatdv"), -1.5f); + tstToObj(schema.getField("floatdvs"), -1.5f); + tstToObj(schema.getField("doubledv"), -1.5d); + tstToObj(schema.getField("doubledvs"), -1.5d); + tstToObj(schema.getField("intdv"), -7); + tstToObj(schema.getField("intdvs"), -7); + tstToObj(schema.getField("longdv"), -11L); + tstToObj(schema.getField("longdvs"), -11L); + tstToObj(schema.getField("datedv"), new Date(1000)); + tstToObj(schema.getField("datedvs"), new Date(1000)); + tstToObj(schema.getField("stringdv"), "foo"); + tstToObj(schema.getField("stringdvs"), "foo"); + tstToObj(schema.getField("booldv"), true); + tstToObj(schema.getField("booldvs"), true); + + } finally { + searcherRef.decref(); + } + } + } + + private void tstToObj(SchemaField sf, Object o) { + List fields = sf.createFields(o); + for (IndexableField field : fields) { + assertEquals(sf.getType().toObject(field), o); + } + } + + @Test + public void testDocValuesSorting() { + assertU( + adoc( + "id", + "1", + "floatdv", + "2", + "intdv", + "3", + "doubledv", + "4", + "longdv", + "5", + "datedv", + "1995-12-31T23:59:59.999Z", + "stringdv", + "b", + "booldv", + "true")); + assertU( + adoc( + "id", + "2", + "floatdv", + "5", + "intdv", + "4", + "doubledv", + "3", + "longdv", + "2", + "datedv", + "1997-12-31T23:59:59.999Z", + "stringdv", + "a", + "booldv", + "false")); + assertU( + adoc( + "id", + "3", + "floatdv", + "3", + "intdv", + "1", + "doubledv", + "2", + "longdv", + "1", + "datedv", + "1996-12-31T23:59:59.999Z", + "stringdv", + "c", + "booldv", + "true")); + assertU(adoc("id", "4")); + assertU(commit()); + assertQ( + req("q", "*:*", "sort", "floatdv desc", "rows", "1", "fl", "id"), + "//str[@name='id'][.='2']"); + assertQ( + req("q", "*:*", "sort", "intdv desc", "rows", "1", "fl", "id"), "//str[@name='id'][.='2']"); + assertQ( + req("q", "*:*", "sort", "doubledv desc", "rows", "1", "fl", "id"), + "//str[@name='id'][.='1']"); + assertQ( + req("q", "*:*", "sort", "longdv desc", "rows", "1", "fl", "id"), + "//str[@name='id'][.='1']"); + assertQ( + req("q", "*:*", "sort", "datedv desc", "rows", "1", "fl", "id,datedv"), + "//str[@name='id'][.='2']", + "//result/doc[1]/date[@name='datedv'][.='1997-12-31T23:59:59.999Z']"); + assertQ( + req("q", "*:*", "sort", "stringdv desc", "rows", "1", "fl", "id"), + "//str[@name='id'][.='4']"); + assertQ( + req("q", "*:*", "sort", "floatdv asc", "rows", "1", "fl", "id"), + "//str[@name='id'][.='4']"); + assertQ( + req("q", "*:*", "sort", "intdv asc", "rows", "1", "fl", "id"), "//str[@name='id'][.='3']"); + assertQ( + req("q", "*:*", "sort", "doubledv asc", "rows", "1", "fl", "id"), + "//str[@name='id'][.='3']"); + assertQ( + req("q", "*:*", "sort", "longdv asc", "rows", "1", "fl", "id"), "//str[@name='id'][.='3']"); + assertQ( + req("q", "*:*", "sort", "datedv asc", "rows", "1", "fl", "id"), "//str[@name='id'][.='1']"); + assertQ( + req("q", "*:*", "sort", "stringdv asc", "rows", "1", "fl", "id"), + "//str[@name='id'][.='2']"); + assertQ( + req("q", "*:*", "sort", "booldv asc", "rows", "10", "fl", "booldv,stringdv"), + "//result/doc[1]/bool[@name='booldv'][.='false']", + "//result/doc[2]/bool[@name='booldv'][.='true']", + "//result/doc[3]/bool[@name='booldv'][.='true']", + "//result/doc[4]/bool[@name='booldv'][.='true']"); + } + + @Test + public void testDocValuesSorting2() { + assertU(adoc("id", "1", "doubledv", "12")); + assertU(adoc("id", "2", "doubledv", "50.567")); + assertU(adoc("id", "3", "doubledv", "+0")); + assertU(adoc("id", "4", "doubledv", "4.9E-324")); + assertU(adoc("id", "5", "doubledv", "-0.1")); + assertU(adoc("id", "6", "doubledv", "-5.123")); + assertU(adoc("id", "7", "doubledv", "1.7976931348623157E308")); + assertU(commit()); + assertQ( + req("fl", "id", "q", "*:*", "sort", "doubledv asc"), + "//result/doc[1]/str[@name='id'][.='6']", + "//result/doc[2]/str[@name='id'][.='5']", + "//result/doc[3]/str[@name='id'][.='3']", + "//result/doc[4]/str[@name='id'][.='4']", + "//result/doc[5]/str[@name='id'][.='1']", + "//result/doc[6]/str[@name='id'][.='2']", + "//result/doc[7]/str[@name='id'][.='7']"); + } + + @Test + public void testFloatAndDoubleRangeQueryRandom() { + + String fieldName[] = new String[] {"floatdv", "doubledv"}; + + Number largestNegative[] = new Number[] {0f - Float.MIN_NORMAL, 0f - Double.MIN_NORMAL}; + Number smallestPositive[] = new Number[] {Float.MIN_NORMAL, Double.MIN_NORMAL}; + Number positiveInfinity[] = new Number[] {Float.POSITIVE_INFINITY, Double.POSITIVE_INFINITY}; + Number negativeInfinity[] = new Number[] {Float.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY}; + Number largestValue[] = new Number[] {Float.MAX_VALUE, Double.MAX_VALUE}; + Number zero[] = new Number[] {0f, 0d}; + Function, Number> noNaN = + (next) -> { + Number num; + while (String.valueOf(num = next.get()).equals("NaN")) + ; + return num; + }; + List> nextRandNoNaN = + Arrays.asList( + () -> noNaN.apply(() -> Float.intBitsToFloat(random().nextInt())), + () -> noNaN.apply(() -> Double.longBitsToDouble(random().nextLong()))); + List> toSortableLong = + Arrays.asList( + (num) -> (long) NumericUtils.floatToSortableInt(num.floatValue()), + (num) -> NumericUtils.doubleToSortableLong(num.doubleValue())); + + // Number minusZero[] = new Number[] {-0f, -0d}; // -0 == 0, so we should not treat them + // differently (and we should not guarantee that sign is preserved... we should be able to index + // both as 0) + + for (int i = 0; i < fieldName.length; i++) { + assertU(delQ("*:*")); + commit(); + + Number specialValues[] = + new Number[] { + largestNegative[i], + smallestPositive[i], + negativeInfinity[i], + largestValue[i], + positiveInfinity[i], + zero[i] + }; + + List values = new ArrayList<>(); + int numDocs = 1 + random().nextInt(10); + for (int j = 0; j < numDocs; j++) { + + if (random().nextInt(100) < 5) { // Add a boundary value with 5% probability + values.add(specialValues[random().nextInt(specialValues.length)]); + } else { + if (fieldName[i].equals("floatdv")) { // Add random values with 95% probability + values.add(Float.intBitsToFloat(random().nextInt())); + } else { + values.add(Double.longBitsToDouble(random().nextLong())); + } + } + } + // Indexing + for (int j = 0; j < values.size(); j++) { + assertU(adoc("id", String.valueOf(j + 1), fieldName[i], String.valueOf(values.get(j)))); + } + assertU(commit()); + + log.info("Indexed values: {}", values); + // Querying + int numQueries = 10000; + for (int j = 0; j < numQueries; j++) { + boolean minInclusive = random().nextBoolean(); + boolean maxInclusive = random().nextBoolean(); + + Number minVal, maxVal; + String min = String.valueOf(minVal = nextRandNoNaN.get(i).get()); + String max = String.valueOf(maxVal = nextRandNoNaN.get(i).get()); + + // randomly use boundary values for min, 15% of the time + int r = random().nextInt(100); + if (r < 5) { + minVal = negativeInfinity[i]; + min = "*"; + } else if (r < 10) { + minVal = specialValues[random().nextInt(specialValues.length)]; + min = String.valueOf(minVal); + } else if (r < 15) { + minVal = values.get(random().nextInt(values.size())); + min = String.valueOf(minVal); + } + + // randomly use boundary values for max, 15% of the time + r = random().nextInt(100); + if (r < 5) { + maxVal = positiveInfinity[i]; + max = "*"; + } else if (r < 10) { + maxVal = specialValues[random().nextInt(specialValues.length)]; + max = String.valueOf(maxVal); + } else if (r < 15) { + // Don't pick a NaN for the range query + Number tmp = values.get(random().nextInt(values.size())); + if (!Double.isNaN(tmp.doubleValue()) && !Float.isNaN(tmp.floatValue())) { + maxVal = tmp; + max = String.valueOf(maxVal); + } + } + + List tests = new ArrayList<>(); + int counter = 0; + + for (int k = 0; k < values.size(); k++) { + Number val = values.get(k); + long valSortable = toSortableLong.get(i).apply(val); + long minSortable = toSortableLong.get(i).apply(minVal); + long maxSortable = toSortableLong.get(i).apply(maxVal); + + if (((minInclusive && minSortable <= valSortable) + || (!minInclusive && minSortable < valSortable) + || (min.equals("*") && Objects.equals(val, negativeInfinity[i]))) + && ((maxInclusive && maxSortable >= valSortable) + || (!maxInclusive && maxSortable > valSortable) + || (max.equals("*") && Objects.equals(val, positiveInfinity[i])))) { + counter++; + tests.add("//result/doc[" + counter + "]/str[@name='id'][.=" + (k + 1) + "]"); + tests.add("//result/doc[" + counter + "]/float[@name='score'][.=1.0]"); + } + } + + tests.add(0, "//*[@numFound='" + counter + "']"); + + String testsArr[] = new String[tests.size()]; + for (int k = 0; k < tests.size(); k++) { + testsArr[k] = tests.get(k); + } + log.info("Expected: {}", tests); + assertQ( + req( + "q", + fieldName[i] + + ":" + + (minInclusive ? '[' : '{') + + min + + " TO " + + max + + (maxInclusive ? ']' : '}'), + "sort", + "id_i asc", + "fl", + "id," + fieldName[i] + ",score"), + testsArr); + } + } + } + + @Test + public void testFloatAndDoubleRangeQuery() { + String fieldName[] = new String[] {"floatdv", "doubledv"}; + String largestNegative[] = + new String[] { + String.valueOf(0f - Float.MIN_NORMAL), String.valueOf(0f - Double.MIN_NORMAL) + }; + String negativeInfinity[] = + new String[] { + String.valueOf(Float.NEGATIVE_INFINITY), String.valueOf(Double.NEGATIVE_INFINITY) + }; + String largestValue[] = + new String[] {String.valueOf(Float.MAX_VALUE), String.valueOf(Double.MAX_VALUE)}; + + for (int i = 0; i < fieldName.length; i++) { + assertU(adoc("id", "1", fieldName[i], "2")); + assertU(adoc("id", "2", fieldName[i], "-5")); + assertU(adoc("id", "3", fieldName[i], "3")); + assertU(adoc("id", "4", fieldName[i], "3")); + assertU(adoc("id", "5", fieldName[i], largestNegative[i])); + assertU(adoc("id", "6", fieldName[i], negativeInfinity[i])); + assertU(adoc("id", "7", fieldName[i], largestValue[i])); + assertU(commit()); + + // Negative Zero to Positive + assertQ( + req( + "q", + fieldName[i] + ":[-0.0 TO 2.5]", + "sort", + "id_i asc", + "fl", + "id," + fieldName[i] + ",score"), + "//*[@numFound='1']", + "//result/doc[1]/str[@name='id'][.=1]"); + + // Negative to Positive Zero + assertQ( + req( + "q", + fieldName[i] + ":[-6 TO 0]", + "sort", + "id_i asc", + "fl", + "id," + fieldName[i] + ",score"), + "//*[@numFound='2']", + "//result/doc[1]/str[@name='id'][.=2]", + "//result/doc[2]/str[@name='id'][.=5]"); + + // Negative to Positive + assertQ( + req( + "q", + fieldName[i] + ":[-6 TO 2.5]", + "sort", + "id_i asc", + "fl", + "id," + fieldName[i] + ",score"), + "//*[@numFound='3']", + "//result/doc[1]/str[@name='id'][.=1]", + "//result/doc[2]/str[@name='id'][.=2]", + "//result/doc[3]/str[@name='id'][.=5]"); + + // Positive to Positive + assertQ( + req( + "q", + fieldName[i] + ":[2 TO 3]", + "sort", + "id_i asc", + "fl", + "id," + fieldName[i] + ",score"), + "//*[@numFound='3']", + "//result/doc[1]/str[@name='id'][.=1]", + "//result/doc[2]/str[@name='id'][.=3]", + "//result/doc[3]/str[@name='id'][.=4]"); + + // Positive to POSITIVE_INF + assertQ( + req( + "q", + fieldName[i] + ":[2 TO *]", + "sort", + "id_i asc", + "fl", + "id," + fieldName[i] + ",score"), + "//*[@numFound='4']", + "//result/doc[1]/str[@name='id'][.=1]", + "//result/doc[2]/str[@name='id'][.=3]", + "//result/doc[3]/str[@name='id'][.=4]", + "//result/doc[4]/str[@name='id'][.=7]"); + + // NEGATIVE_INF to Negative + assertQ( + req( + "q", + fieldName[i] + ":[* TO -1]", + "sort", + "id_i asc", + "fl", + "id," + fieldName[i] + ",score"), + "//*[@numFound='2']", + "//result/doc[1]/str[@name='id'][.=2]", + "//result/doc[2]/str[@name='id'][.=6]"); + + // NEGATIVE_INF to Positive + assertQ( + req("q", fieldName[i] + ":[* TO 2]", "sort", "id_i asc", "fl", "id," + fieldName[i]), + "//*[@numFound='4']", + "//result/doc[1]/str[@name='id'][.=1]", + "//result/doc[2]/str[@name='id'][.=2]", + "//result/doc[3]/str[@name='id'][.=5]", + "//result/doc[4]/str[@name='id'][.=6]"); + + // NEGATIVE_INF to Positive (non-inclusive) + assertQ( + req("q", fieldName[i] + ":[* TO 2}", "sort", "id_i asc", "fl", "id," + fieldName[i]), + "//*[@numFound='3']", + "//result/doc[1]/str[@name='id'][.=2]", + "//result/doc[2]/str[@name='id'][.=5]", + "//result/doc[3]/str[@name='id'][.=6]"); + + // Negative to POSITIVE_INF + assertQ( + req("q", fieldName[i] + ":[-6 TO *]", "sort", "id_i asc", "fl", "id," + fieldName[i]), + "//*[@numFound='6']", + "//result/doc[1]/str[@name='id'][.=1]", + "//result/doc[2]/str[@name='id'][.=2]", + "//result/doc[3]/str[@name='id'][.=3]", + "//result/doc[4]/str[@name='id'][.=4]", + "//result/doc[5]/str[@name='id'][.=5]", + "//result/doc[6]/str[@name='id'][.=7]"); + + // NEGATIVE_INF to POSITIVE_INF + assertQ( + req( + "q", + fieldName[i] + ":[* TO *]", + "sort", + "id_i asc", + "fl", + "id," + fieldName[i] + ",score"), + "//*[@numFound='7']", + "//result/doc[1]/str[@name='id'][.=1]", + "//result/doc[2]/str[@name='id'][.=2]", + "//result/doc[3]/str[@name='id'][.=3]", + "//result/doc[4]/str[@name='id'][.=4]", + "//result/doc[5]/str[@name='id'][.=5]", + "//result/doc[6]/str[@name='id'][.=6]", + "//result/doc[7]/str[@name='id'][.=7]", + "//result/doc[1]/float[@name='score'][.=1.0]", + "//result/doc[2]/float[@name='score'][.=1.0]", + "//result/doc[3]/float[@name='score'][.=1.0]", + "//result/doc[4]/float[@name='score'][.=1.0]", + "//result/doc[5]/float[@name='score'][.=1.0]", + "//result/doc[6]/float[@name='score'][.=1.0]", + "//result/doc[7]/float[@name='score'][.=1.0]"); + } + } +} diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java index 277bbfc214a7..60296c540f16 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -31,7 +31,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.icu.ICUCollationKeyAnalyzer; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; @@ -326,9 +325,13 @@ public List createFields(SchemaField field, Object value) { fields.add(createField(field, value)); final BytesRef bytes = getCollationKey(field.getName(), value.toString()); if (field.multiValued()) { - fields.add(new SortedSetDocValuesField(field.getName(), bytes)); + fields.add( + DocValuesFieldUtil.createSortedSetDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList())); } else { - fields.add(new SortedDocValuesField(field.getName(), bytes)); + fields.add( + DocValuesFieldUtil.createSortedDocValuesField( + field.getName(), bytes, field.hasDocValuesSkipList())); } return fields; } else {