diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index 145d532fa7f4..7b958ac8b5d9 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -30,6 +30,10 @@ *
  • {@link #reset()} : reset the filter state before filtering a new row.
  • *
  • {@link #filterAllRemaining()}: true means row scan is over; false means keep going.
  • *
  • {@link #filterRowKey(Cell)}: true means drop this row; false means include.
  • + *
  • {@link #getHintForRejectedRow(Cell)}: if {@code filterRowKey} returned true, optionally + * provide a seek hint to skip past the rejected row efficiently.
  • + *
  • {@link #getSkipHint(Cell)}: when a cell is structurally skipped (time-range, column, or + * version gate) before {@code filterCell} is reached, optionally provide a seek hint.
  • *
  • {@link #filterCell(Cell)}: decides whether to include or exclude this Cell. See * {@link ReturnCode}.
  • *
  • {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the Cell. @@ -224,6 +228,85 @@ public enum ReturnCode { */ abstract public Cell getNextCellHint(final Cell currentCell) throws IOException; + /** + * Provides a seek hint to bypass row-by-row scanning after {@link #filterRowKey(Cell)} rejects a + * row. When {@code filterRowKey} returns {@code true} the scan pipeline would normally iterate + * through every remaining cell in the rejected row one-by-one (via {@code nextRow()}) before + * moving on. If the filter can determine a better forward position — for example, the next range + * boundary in a {@code MultiRowRangeFilter} — it should return that target cell here, allowing + * the scanner to seek directly past the unwanted rows. + *

    + * Contract: + *

    + * @param firstRowCell the first cell encountered in the rejected row; contains the row key that + * was passed to {@code filterRowKey} + * @return a {@link Cell} representing the earliest position the scanner should seek to, or + * {@code null} if this filter cannot provide a better position than a sequential skip + * @throws IOException in case an I/O or filter-specific failure needs to be signaled + * @see #filterRowKey(Cell) + */ + public Cell getHintForRejectedRow(final Cell firstRowCell) throws IOException { + return null; + } + + /** + * Provides a seek hint for cells that are structurally skipped by the scan pipeline + * before {@link #filterCell(Cell)} is ever reached. The pipeline short-circuits on + * several criteria — time-range mismatch, column-set exclusion, and version-limit exhaustion — + * and in each case the filter is bypassed entirely. When an implementation can compute a + * meaningful forward position purely from the cell's coordinates (without needing the + * {@code filterCell} call sequence), it should return that position here so the scanner can seek + * ahead instead of advancing one cell at a time. + *

    + * Contract: + *

    + * @param skippedCell the cell that was rejected by the time-range, column, or version gate before + * {@code filterCell} could be consulted + * @return a {@link Cell} representing the earliest position the scanner should seek to, or + * {@code null} if this filter cannot provide a better position than the structural hint + * @throws IOException in case an I/O or filter-specific failure needs to be signaled + * @see #filterCell(Cell) + * @see #getNextCellHint(Cell) + */ + public Cell getSkipHint(final Cell skippedCell) throws IOException { + return null; + } + /** * Check that given column family is essential for filter to check row. Most filters always return * true here. But some could have more sophisticated logic which could significantly reduce diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java index 8c330d6aa9b9..e2ca7444bd56 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java @@ -112,6 +112,28 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return null; } + /** + * Filters that cannot provide a seek hint after row-key rejection can inherit this no-op + * implementation. Subclasses whose row-key logic (e.g. a range pointer advanced inside + * {@link #filterRowKey(Cell)}) makes a better seek target available should override this. + * {@inheritDoc} + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return null; + } + + /** + * Filters that cannot provide a structural-skip seek hint can inherit this no-op implementation. + * Subclasses with purely configuration-driven, stateless hint computation (e.g. a fixed column + * range or fuzzy-row pattern) may override this to avoid cell-by-cell advancement when the + * time-range, column, or version gate fires. {@inheritDoc} + */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return null; + } + /** * By default, we require all scan's column families to be present. Our subclasses may be more * precise. {@inheritDoc} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java index c31fc25ec824..77494b6cd2ef 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java @@ -93,6 +93,26 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return this.filter.getNextCellHint(currentCell); } + /** + * Delegates to the wrapped filter's {@link Filter#getHintForRejectedRow(Cell)} so that the scan + * pipeline can seek directly past a rejected row rather than iterating cell-by-cell via + * {@code nextRow()}. {@inheritDoc} + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return this.filter.getHintForRejectedRow(firstRowCell); + } + + /** + * Delegates to the wrapped filter's {@link Filter#getSkipHint(Cell)} so that the scan pipeline + * can seek ahead when a cell is structurally skipped before {@code filterCell} is reached. + * {@inheritDoc} + */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return this.filter.getSkipHint(skippedCell); + } + @Override public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException { // No call to this. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 0705b454de1b..ac3cfbe514e9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.ExtendedCell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; @@ -86,6 +87,7 @@ public class RegionScannerImpl implements RegionScanner, Shipper, RpcCallback { protected final byte[] stopRow; protected final boolean includeStopRow; + protected final boolean reversed; protected final HRegion region; protected final CellComparator comparator; @@ -130,6 +132,7 @@ private static boolean hasNonce(HRegion region, long nonce) { defaultScannerContext = ScannerContext.newBuilder().setBatchLimit(scan.getBatch()).build(); this.stopRow = scan.getStopRow(); this.includeStopRow = scan.includeStopRow(); + this.reversed = scan.isReversed(); this.operationId = scan.getId(); // synchronize on scannerReadPoints so that nobody calculates @@ -503,11 +506,18 @@ private boolean nextInternal(List results, ScannerContext scannerContext) if (isFilterDoneInternal()) { return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } + // HBASE-29974: ask the filter for a seek hint so we can jump directly past the rejected + // row instead of iterating through its cells one-by-one via nextRow(). + ExtendedCell rowHint = getHintForRejectedRow(current); // Typically the count of rows scanned is incremented inside #populateResult. However, // here we are filtering a row based purely on its row key, preventing us from calling - // #populateResult. Thus, perform the necessary increment here to rows scanned metric + // #populateResult. Thus, perform the necessary increment here to rows scanned metric. + // Placed after getHintForRejectedRow so that a buggy filter throwing DNRIOE doesn't + // leave the metric incremented for a row that was never actually processed. incrementCountOfRowsScannedMetric(scannerContext); - boolean moreRows = nextRow(scannerContext, current); + boolean moreRows = (rowHint != null) + ? nextRowViaHint(scannerContext, current, rowHint) + : nextRow(scannerContext, current); if (!moreRows) { return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } @@ -749,6 +759,94 @@ protected boolean nextRow(ScannerContext scannerContext, Cell curRowCell) throws || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); } + /** + * Fast-path alternative to {@link #nextRow} used when the filter has provided a seek hint via + * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}. Instead of iterating + * through every cell in the rejected row one-by-one, this method issues a single seek to jump + * directly to the filter's suggested position ({@code requestSeek} for forward scans, + * {@code backwardSeek} for reversed scans). + *

    + * The skipping-row mode flag is set around the seek so that block-level size tracking continues + * to function (consistent with {@link #nextRow}), and the filter state is reset afterwards so the + * next row starts with a clean filter context. + *

    + * Stop-row invariant: This method does not validate that {@code hint} falls + * within the scan's stop row. If the hint overshoots, the next iteration's + * {@link #shouldStop(Cell)} check catches it and returns NO_MORE_VALUES. One wasted seek may + * occur, but correctness is maintained. + *

    + * Metrics note: The rows-scanned metric is incremented once by the caller for + * the rejected row. Rows physically skipped by the seek are not individually counted — this + * reflects the fact that no per-row work was done for those rows. + *

    + * Coprocessor note: {@code postScannerFilterRow} is invoked once with + * {@code curRowCell}, not once per skipped row. Coprocessors counting filtered rows should be + * aware of this semantic when the hint path is used. + * @param scannerContext scanner context used for limit tracking + * @param curRowCell the first cell of the row that was rejected by {@code filterRowKey}; + * passed to the coprocessor hook for observability + * @param hint the validated {@link ExtendedCell} returned by the filter; the scanner + * will seek to this position + * @return {@code true} if scanning should continue, {@code false} if a coprocessor requests an + * early stop (mirrors the contract of {@link #nextRow}) + * @throws IOException if the seek or the coprocessor hook signals a failure + */ + private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, ExtendedCell hint) + throws IOException { + assert this.joinedContinuationRow == null : "Trying to go to next row during joinedHeap read."; + + int difference = comparator.compareRows(hint, curRowCell); + if ((!reversed && difference > 0) || (reversed && difference < 0)) { + scannerContext.setSkippingRow(true); + if (reversed) { + // ReversedKeyValueHeap does not support requestSeek; use backwardSeek + // to position at-or-before the hint within the target row. + // seekToPreviousRow would skip past the hint row entirely. + this.storeHeap.backwardSeek(hint); + } else { + this.storeHeap.requestSeek(hint, true, true); + } + scannerContext.setSkippingRow(false); + + resetFilters(); + + return this.region.getCoprocessorHost() == null + || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); + } + + return nextRow(scannerContext, curRowCell); + } + + /** + * Asks the current {@link org.apache.hadoop.hbase.filter.FilterWrapper} for a seek hint to use + * after a row has been rejected by {@link #filterRowKey}. If the wrapped filter overrides + * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}, this returns its + * answer as an {@link ExtendedCell}; otherwise returns {@code null}. + *

    + * The returned cell is validated to be an {@link ExtendedCell} because filters run on the server + * side and the scanner infrastructure requires {@code ExtendedCell} references. + * @param rowCell the first cell of the rejected row (same cell passed to {@code filterRowKey}) + * @return a validated {@link ExtendedCell} seek target, or {@code null} if the filter provides no + * hint + * @throws DoNotRetryIOException if the filter returns a non-{@link ExtendedCell} instance + * @throws IOException if the filter signals an I/O failure + */ + private ExtendedCell getHintForRejectedRow(Cell rowCell) throws IOException { + if (filter == null) { + return null; + } + Cell hint = filter.getHintForRejectedRow(rowCell); + if (hint == null) { + return null; + } + if (!(hint instanceof ExtendedCell)) { + throw new DoNotRetryIOException( + "Incorrect filter implementation: the Cell returned by getHintForRejectedRow " + + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); + } + return (ExtendedCell) hint; + } + protected boolean shouldStop(Cell currentRowCell) { if (currentRowCell == null) { return true; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index c07b91b77e68..0c7c902afb98 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -61,6 +61,18 @@ public abstract class UserScanQueryMatcher extends ScanQueryMatcher { private ExtendedCell curColCell = null; + /** + * Holds a seek-hint produced by {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} + * at one of the structural short-circuit points in {@link #matchColumn}. When non-null this is + * returned by {@link #getNextKeyHint} instead of delegating to + * {@link org.apache.hadoop.hbase.filter.Filter#getNextCellHint}, because the hint was computed + * for a cell that never reached {@code filterCell}. Cleared on every {@link #getNextKeyHint} call + * so it cannot leak across multiple seek-hint cycles. + */ + private ExtendedCell pendingSkipHint = null; + + private final boolean reversed; + private static ExtendedCell createStartKey(Scan scan, ScanInfo scanInfo) { if (scan.includeStartRow()) { return createStartKeyFromRow(scan.getStartRow(), scanInfo); @@ -82,6 +94,7 @@ protected UserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker colum this.versionsAfterFilter = 0; } this.stopRow = scan.getStopRow(); + this.reversed = scan.isReversed(); TimeRange timeRange = scan.getColumnFamilyTimeRange().get(scanInfo.getFamily()); if (timeRange == null) { this.tr = scan.getTimeRange(); @@ -107,6 +120,11 @@ public Filter getFilter() { @Override public ExtendedCell getNextKeyHint(ExtendedCell cell) throws IOException { + if (pendingSkipHint != null) { + ExtendedCell hint = pendingSkipHint; + pendingSkipHint = null; + return hint; + } if (filter == null) { return null; } else { @@ -118,7 +136,6 @@ public ExtendedCell getNextKeyHint(ExtendedCell cell) throws IOException { + "the Cell returned by getNextKeyHint is not an ExtendedCell. Filter class: " + filter.getClass().getName()); } - } } @@ -128,20 +145,47 @@ public void beforeShipped() throws IOException { if (curColCell != null) { this.curColCell = KeyValueUtil.toNewKeyCell(this.curColCell); } + if (pendingSkipHint != null) { + this.pendingSkipHint = KeyValueUtil.toNewKeyCell(this.pendingSkipHint); + } } + @Override + public void setToNewRow(ExtendedCell currentRow) { + pendingSkipHint = null; + super.setToNewRow(currentRow); + } + + @Override + public void clearCurrentRow() { + pendingSkipHint = null; + super.clearCurrentRow(); + } + + // At each structural short-circuit below (time-range, column-exclusion, version-exhaustion), + // the filter is consulted via resolveSkipHint() before falling back to the default skip/seek + // code. This lets filters provide a forward seek target even when filterCell is never called. protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte typeByte) throws IOException { int tsCmp = tr.compare(timestamp); if (tsCmp > 0) { + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return MatchCode.SKIP; } if (tsCmp < 0) { + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return columns.getNextRowOrNextColumn(cell); } // STEP 1: Check if the column is part of the requested columns MatchCode matchCode = columns.checkColumn(cell, typeByte); if (matchCode != MatchCode.INCLUDE) { + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return matchCode; } /* @@ -151,8 +195,14 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty matchCode = columns.checkVersions(cell, timestamp, typeByte, false); switch (matchCode) { case SKIP: + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return MatchCode.SKIP; case SEEK_NEXT_COL: + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return MatchCode.SEEK_NEXT_COL; default: // It means it is INCLUDE, INCLUDE_AND_SEEK_NEXT_COL or INCLUDE_AND_SEEK_NEXT_ROW. @@ -166,6 +216,47 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty : mergeFilterResponse(cell, matchCode, filter.filterCell(cell)); } + /** + * Asks the current filter for a seek hint via + * {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)}, validates the returned cell + * type, and if non-null stores it in {@link #pendingSkipHint} so that {@link #getNextKeyHint} can + * return it when the scan pipeline asks for the seek target after receiving + * {@link ScanQueryMatcher.MatchCode#SEEK_NEXT_USING_HINT}. + *

    + * This is only called from the structural short-circuit branches of {@link #matchColumn}, where + * {@code filterCell} has not been called, in accordance with the stateless contract of + * {@code Filter#getSkipHint}. The filter-null guard is included here so call-sites need no + * boilerplate. + * @param cell the cell that triggered the structural short-circuit + * @return {@code true} if the filter returned a valid hint (stored in {@link #pendingSkipHint}), + * {@code false} if no filter is set or the filter returned {@code null} + * @throws DoNotRetryIOException if the filter returns a non-{@link ExtendedCell} instance + * @throws IOException if the filter signals an I/O failure + */ + private boolean resolveSkipHint(ExtendedCell cell) throws IOException { + if (filter == null) { + return false; + } + Cell raw = filter.getSkipHint(cell); + if (raw == null) { + return false; + } + if (!(raw instanceof ExtendedCell)) { + throw new DoNotRetryIOException( + "Incorrect filter implementation: the Cell returned by getSkipHint " + + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); + } + ExtendedCell hint = (ExtendedCell) raw; + // Full-key compare is intentional: skip hints can advance within the same row + // (e.g., to a later column), not just across rows. + int cmp = rowComparator.compare(hint, cell); + if ((!reversed && cmp <= 0) || (reversed && cmp >= 0)) { + return false; + } + pendingSkipHint = hint; + return true; + } + /** * Call this when scan has filter. Decide the desired behavior by checkVersions's MatchCode and * filterCell's ReturnCode. Cell may be skipped by filter, so the column versions in result may be diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java new file mode 100644 index 000000000000..ab9c87865ff4 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -0,0 +1,646 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.RegionScanner; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; + +@Tag(FilterTests.TAG) +@Tag(MediumTests.TAG) +public class TestFilterHintForRejectedRow { + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private static final byte[] FAMILY = Bytes.toBytes("f"); + private static final byte[] FAMILY2 = Bytes.toBytes("g"); + private static final int CELLS_PER_ROW = 10; + private static final byte[] VALUE = Bytes.toBytes("value"); + + private HRegion region; + + @BeforeEach + public void setUp(TestInfo testInfo) throws Exception { + TableDescriptor tableDescriptor = + TableDescriptorBuilder.newBuilder(TableName.valueOf(testInfo.getTestMethod().get().getName())) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY2)).build(); + RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build(); + this.region = HBaseTestingUtility.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(), + TEST_UTIL.getConfiguration(), tableDescriptor); + } + + @AfterEach + public void tearDown() throws Exception { + HBaseTestingUtility.closeRegionAndWAL(this.region); + } + + private void writeRows(String prefix, int count) throws IOException { + for (int i = 0; i < count; i++) { + byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + for (int j = 0; j < CELLS_PER_ROW; j++) { + p.addColumn(FAMILY, Bytes.toBytes(String.format("q-%02d", j)), VALUE); + } + this.region.put(p); + } + this.region.flush(true); + } + + private void writeRowsMultiFamily(String prefix, int count) throws IOException { + for (int i = 0; i < count; i++) { + byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + for (int j = 0; j < CELLS_PER_ROW; j++) { + byte[] qual = Bytes.toBytes(String.format("q-%02d", j)); + p.addColumn(FAMILY, qual, VALUE); + p.addColumn(FAMILY2, qual, VALUE); + } + this.region.put(p); + } + this.region.flush(true); + } + + private List scanAll(Scan scan) throws IOException { + List results = new ArrayList<>(); + try (RegionScanner scanner = region.getScanner(scan)) { + List rowCells = new ArrayList<>(); + boolean hasMore; + do { + hasMore = scanner.next(rowCells); + results.addAll(rowCells); + rowCells.clear(); + } while (hasMore); + } + return results; + } + + @Test + public void testHintAndNoHintReturnSameCellsOnSameData() throws IOException { + final String prefix = "row"; + final int rejectedCount = 5; + final int acceptedCount = 5; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size()); + assertEquals(1, hintCallCount.get(), + "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows"); + } + + @Test + public void testHintBeyondLastRowTerminatesScanGracefully() throws IOException { + final String prefix = "hint-beyond"; + writeRows(prefix, 5); + + final byte[] beyondAllRows = Bytes.toBytes("zzz-beyond-table-end"); + + FilterBase beyondHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(beyondAllRows); + } + }; + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(beyondHintFilter)); + assertTrue(results.isEmpty(), + "When the hint is past the last row, no cells should be returned"); + } + + @Test + public void testPerRowHintCalledOncePerRejectedRow() throws IOException { + final String prefix = "hint-perrow"; + final int rejectedCount = 4; + final int acceptedCount = 2; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase perRowHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnNextRow(firstRowCell); + } + }; + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(perRowHintFilter)); + + assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); + for (Cell c : results) { + assertTrue( + Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, + acceptedStartRow.length) >= 0, + "Every returned cell must belong to the accepted row range"); + } + assertEquals(rejectedCount, hintCallCount.get(), + "getHintForRejectedRow must be called once per rejected row in the per-row hint strategy"); + } + + @Test + public void testReversedScanWithHint() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Accept rows 00-04 (lower half), reject rows 05-09 (upper half). + // In a reversed scan, the scanner starts at row-09 and moves backward. + final byte[] rejectThreshold = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 4)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + // In reversed scan, hint must point backward (to a smaller row key). + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + }; + + Scan reversedHintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(hintFilter); + Scan reversedNoHintScan = + new Scan().addFamily(FAMILY).setReversed(true).setFilter(noHintFilter); + + List hintResults = scanAll(reversedHintScan); + List noHintResults = scanAll(reversedNoHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(5 * CELLS_PER_ROW, hintResults.size()); + assertEquals(1, hintCallCount.get(), + "getHintForRejectedRow must be called exactly once for reversed scan"); + } + + @Test + public void testGetSkipHintWithTimeRangeIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("skiprow-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(FAMILY, Bytes.toBytes("q"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("q"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + final AtomicInteger skipHintCalls = new AtomicInteger(0); + + FilterBase skipHintFilter = new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + skipHintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnNextRow(skippedCell); + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(skipHintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(rowCount, hintResults.size()); + } + + @Test + public void testBackwardHintFallsBackToNextRow() throws IOException { + final String prefix = "row"; + writeRows(prefix, 5); + + final byte[] row00 = Bytes.toBytes(String.format("%s-%02d", prefix, 0)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 2)); + + FilterBase backwardHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(row00); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(backwardHintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "Backward hint must produce same results as no-hint path"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + } + + @Test + public void testSameRowHintFallsBackToNextRow() throws IOException { + final String prefix = "row"; + writeRows(prefix, 5); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 2)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase sameRowHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(CellUtil.cloneRow(firstRowCell)); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(sameRowHintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "Same-row hint must produce same results as no-hint path"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(3 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testCoprocessorHookCalledOncePerHintedRejection() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + + assertEquals(1, hintCallCount.get()); + assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); + } + + @Test + public void testMultiFamilyScanWithHint() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRowsMultiFamily(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + // Scan both families. + Scan hintScan = new Scan().addFamily(FAMILY).addFamily(FAMILY2).setFilter(hintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).addFamily(FAMILY2).setFilter(noHintFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(2 * CELLS_PER_ROW * acceptedCount, hintResults.size()); + assertEquals(1, hintCallCount.get()); + } + + @Test + public void testDataCorrectnessVsUnfilteredScan() throws IOException { + final String prefix = "row"; + final int totalRows = 8; + writeRows(prefix, totalRows); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + List filteredResults = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + + // Unfiltered scan starting at acceptedStartRow — should return the exact same cells. + Scan unfilteredScan = new Scan().addFamily(FAMILY).withStartRow(acceptedStartRow); + List unfilteredResults = scanAll(unfilteredScan); + + assertEquals(unfilteredResults.size(), filteredResults.size(), + "Filtered and unfiltered scans must return same cell count"); + for (int i = 0; i < filteredResults.size(); i++) { + assertTrue(CellUtil.equals(filteredResults.get(i), unfilteredResults.get(i)), + "Cell mismatch at index " + i); + } + } + + @Test + public void testHintOvershootingStopRowTerminatesGracefully() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Scan rows 00-06 (stopRow=row-07, exclusive). Reject rows 00-02, hint to row-09. + // The hint overshoots the stop row, so the scan should terminate with no results + // from the rejected range and only include rows 03-06 from the non-rejected range. + final byte[] stopRow = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 9)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + // Hint past the stop row — scanner should terminate gracefully. + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + Scan scan = new Scan().addFamily(FAMILY).withStopRow(stopRow).setFilter(hintFilter); + List results = scanAll(scan); + + // The hint jumps past stopRow, so no cells should be returned. + assertTrue(results.isEmpty(), "Hint past stop row must terminate scan with no results"); + } + + @Test + public void testHintWithinStopRowReturnsCorrectResults() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Scan rows 00-06 (stopRow=row-07, exclusive). Reject rows 00-02, hint to row-03. + final byte[] stopRow = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).withStopRow(stopRow).setFilter(hintFilter); + List hintResults = scanAll(hintScan); + + // Should get rows 03-06 (4 rows). + Scan baselineScan = + new Scan().addFamily(FAMILY).withStartRow(acceptedStartRow).withStopRow(stopRow); + List baselineResults = scanAll(baselineScan); + + assertEquals(baselineResults.size(), hintResults.size(), + "Hint within stop row must return same results as baseline"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(4 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testGetSkipHintCalledForReversedScan() throws IOException { + final long insideTs = 1500; + final long tooNewTs = 5000; + final int rowCount = 5; + final byte[] qInside = Bytes.toBytes("q-inside"); + final byte[] qNew = Bytes.toBytes("q-new"); + + // Write two qualifiers per row: "q-inside" at ts=1500 (within range) and "q-new" at + // ts=5000 (above the range upper bound). The time-range gate fires for the too-new cell + // before version tracking can absorb it, ensuring getSkipHint is consulted. + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("skiprev-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(FAMILY, qInside, insideTs, VALUE); + p.addColumn(FAMILY, qNew, tooNewTs, VALUE); + region.put(p); + } + region.flush(true); + + final AtomicInteger skipHintCalls = new AtomicInteger(0); + + // Return null to verify the code path is reached without altering scan semantics. + // The unit test TestUserScanQueryMatcher#testSkipHintConsultedForReversedScan validates + // that a non-null hint is correctly accepted by the reversed-scan guard. + FilterBase skipHintFilter = new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + skipHintCalls.incrementAndGet(); + return null; + } + }; + + // Reversed scan with time range [1000, 3000): insideTs cells pass, tooNewTs cells are + // structurally skipped (tsCmp > 0). The skip hint should be consulted. + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setReversed(true) + .setFilter(skipHintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setReversed(true); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(rowCount, hintResults.size()); + assertTrue(skipHintCalls.get() > 0, + "getSkipHint must be called at least once for reversed scan"); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java index fafbfb8f66ef..0ee3a065cfa3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java @@ -20,6 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import java.io.IOException; import java.util.ArrayList; @@ -291,6 +292,19 @@ public ReturnCode filterKeyValue(final Cell c) throws IOException { } } + private static class FixedSkipHintFilter extends FilterBase { + final ExtendedCell hint; + + FixedSkipHintFilter(ExtendedCell hint) { + this.hint = hint; + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return hint; + } + } + /** * Here is the unit test for UserScanQueryMatcher#mergeFilterResponse, when the number of cells * exceed the versions requested in scan, we should return SEEK_NEXT_COL, but if current match @@ -394,6 +408,216 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE, assertArrayEquals(nextCell.getQualifierArray(), col4); } + /** Verify that getSkipHint is consulted when a cell is newer than the time-range upper bound. */ + @Test + public void testSkipHintConsultedForCellNewerThanTimeRange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 1500, data); + Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs) + .setFilter(new FixedSkipHintFilter(hintCell)); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now2, null); + + // ts=now2 >= maxTs, so tsCmp > 0: time-range gate fires before filterCell. + KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew)); + assertEquals(hintCell, qm.getNextKeyHint(tooNew)); + assertNull(qm.getNextKeyHint(tooNew)); + } + + /** Verify that getSkipHint is consulted when a cell is older than the time-range lower bound. */ + @Test + public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 500; + long maxTs = now; + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 100, data); + Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs) + .setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now, null); + + // ts = now-1000 < minTs (now-500), so tsCmp < 0: time-range gate fires. + KeyValue tooOld = new KeyValue(row1, fam2, col1, now - 1000, data); + qm.setToNewRow(tooOld); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooOld)); + assertEquals(hintCell, qm.getNextKeyHint(tooOld)); + assertNull(qm.getNextKeyHint(tooOld)); + } + + /** Verify that a null getSkipHint falls back to the original SKIP match code. */ + @Test + public void testNullSkipHintFallsThroughToOriginalCodeOnTimeRangeGate() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + Scan timeRangeScan = + new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new AlwaysIncludeFilter()); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now2, null); + + KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + assertEquals(MatchCode.SKIP, qm.match(tooNew)); + } + + /** Verify that getSkipHint is consulted when a column is excluded by the scan's column set. */ + @Test + public void testSkipHintConsultedForExcludedColumn() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + // get.getFamilyMap().get(fam2) contains col2, col4, col5; presenting col1 triggers exclusion. + KeyValue hintCell = new KeyValue(row1, fam2, col2, 1, data); + Scan scanWithFilter = new Scan(scan).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create( + scanWithFilter, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + get.getFamilyMap().get(fam2), now - ttl, now, null); + + // col1 is not in {col2, col4, col5}: checkColumn returns a non-INCLUDE code. + KeyValue excludedCol = new KeyValue(row1, fam2, col1, 1, data); + qm.setToNewRow(excludedCol); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(excludedCol)); + assertEquals(hintCell, qm.getNextKeyHint(excludedCol)); + assertNull(qm.getNextKeyHint(excludedCol)); + } + + /** Verify that getSkipHint is consulted when the version limit is exhausted. */ + @Test + public void testSkipHintConsultedOnVersionExhaustion() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data); + Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, now - ttl, now, null); + + KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(version1); + assertEquals(MatchCode.INCLUDE, qm.match(version1)); + + // Second version: version limit exceeded, checkVersions returns SEEK_NEXT_COL. + KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); + assertEquals(hintCell, qm.getNextKeyHint(version2)); + assertNull(qm.getNextKeyHint(version2)); + } + + /** Verify that pendingSkipHint is cleared when a row transition occurs via setToNewRow. */ + @Test + public void testPendingSkipHintClearedOnRowTransition() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data); + Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, now - ttl, now, null); + + // Trigger a structural skip that stores pendingSkipHint. + KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(version1); + assertEquals(MatchCode.INCLUDE, qm.match(version1)); + KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); + + // Do NOT consume the hint via getNextKeyHint. Instead, simulate a row transition. + KeyValue newRowCell = new KeyValue(row2, fam2, col1, now - 10, data); + qm.setToNewRow(newRowCell); + + // After row transition, pendingSkipHint must be null — getNextKeyHint should delegate + // to the filter's getNextCellHint (which returns null for FixedSkipHintFilter). + assertNull(qm.getNextKeyHint(newRowCell)); + } + + /** Verify that the normal filterCell/getNextCellHint path is unaffected by pendingSkipHint. */ + @Test + public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + final KeyValue filterHintCell = new KeyValue(row2, fam2, col1, now, data); + + FilterBase seekFilter = new FilterBase() { + @Override + public ReturnCode filterCell(Cell c) { + return ReturnCode.SEEK_NEXT_USING_HINT; + } + + @Override + public Cell getNextCellHint(Cell currentCell) { + return filterHintCell; + } + }; + + Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(seekFilter); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, now - ttl, now, null); + + KeyValue cell = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(cell); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(cell)); + assertEquals(filterHintCell, qm.getNextKeyHint(cell)); + } + + /** Verify that getSkipHint works correctly for reversed scans (hint must be smaller). */ + @Test + public void testSkipHintConsultedForReversedScan() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + // For reversed scan, the hint must point backward (smaller key). + KeyValue hintCell = new KeyValue(row1, fam2, col1, now - 1500, data); + Scan reversedScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setReversed(true) + .setFilter(new FixedSkipHintFilter(hintCell)); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(reversedScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now2, null); + + // ts=now2 >= maxTs so tsCmp > 0: time-range gate fires. + // row2 > row1 in natural order, so for reversed scan hint (row1) is "forward" (smaller). + KeyValue tooNew = new KeyValue(row2, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew)); + assertEquals(hintCell, qm.getNextKeyHint(tooNew)); + assertNull(qm.getNextKeyHint(tooNew)); + } + /** * After enough consecutive range delete markers, the matcher should switch from SKIP to * SEEK_NEXT_COL. Point deletes and KEEP_DELETED_CELLS always SKIP.