From 17c4d1993f6ad0a036c9e54ce0e3953585786668 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Sat, 9 May 2026 07:56:20 +0530 Subject: [PATCH] HBASE-29974 Persist filter hints across scan circuit breaks (branch-2.6 backport) Backport of apache/hbase#7882 to branch-2.6. Adds getHintForRejectedRow() and getSkipHint() to the Filter API, enabling filters to provide seek hints when rows are rejected by filterRowKey or cells are structurally skipped before filterCell is reached. This avoids unnecessary cell-by-cell iteration after scan circuit breaks. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../apache/hadoop/hbase/filter/Filter.java | 77 +++ .../hadoop/hbase/filter/FilterBase.java | 22 + .../hadoop/hbase/filter/FilterWrapper.java | 20 + .../hbase/regionserver/RegionScannerImpl.java | 56 +- .../querymatcher/UserScanQueryMatcher.java | 76 +++ .../filter/TestFilterHintForRejectedRow.java | 644 ++++++++++++++++++ .../TestUserScanQueryMatcher.java | 224 ++++++ 7 files changed, 1117 insertions(+), 2 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index 29bfce4b07d0..cdc7f855ca92 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -30,6 +30,10 @@ *
  • {@link #reset()} : reset the filter state before filtering a new row.
  • *
  • {@link #filterAllRemaining()}: true means row scan is over; false means keep going.
  • *
  • {@link #filterRowKey(Cell)}: true means drop this row; false means include.
  • + *
  • {@link #getHintForRejectedRow(Cell)}: if {@code filterRowKey} returned true, optionally + * provide a seek hint to skip past the rejected row efficiently.
  • + *
  • {@link #getSkipHint(Cell)}: when a cell is structurally skipped (time-range, column, or + * version gate) before {@code filterCell} is reached, optionally provide a seek hint.
  • *
  • {@link #filterCell(Cell)}: decides whether to include or exclude this Cell. See * {@link ReturnCode}.
  • *
  • {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the Cell. @@ -219,6 +223,79 @@ public enum ReturnCode { */ abstract public Cell getNextCellHint(final Cell currentCell) throws IOException; + /** + * Provides a seek hint to bypass row-by-row scanning after {@link #filterRowKey(Cell)} rejects a + * row. When {@code filterRowKey} returns {@code true} the scan pipeline would normally iterate + * through every remaining cell in the rejected row one-by-one (via {@code nextRow()}) before + * moving on. If the filter can determine a better forward position — for example, the next range + * boundary in a {@code MultiRowRangeFilter} — it should return that target cell here, allowing + * the scanner to seek directly past the unwanted rows. + *

    + * Contract: + *

      + *
    • Only called after {@link #filterRowKey(Cell)} has returned {@code true} for the same + * {@code firstRowCell}.
    • + *
    • Implementations may use state that was set during {@link #filterRowKey(Cell)} (e.g. an + * updated range pointer), but must not invoke {@link #filterCell(Cell)} logic — + * the caller guarantees that {@code filterCell} has not been called for this row.
    • + *
    • Returning {@code null} (the default) falls through to the existing {@code nextRow()} + * behaviour, preserving full backward compatibility.
    • + *
    • For reversed scans ({@link org.apache.hadoop.hbase.client.Scan#isReversed()}), the hint + * must point to a smaller row key (earlier in reverse-scan direction). The scanner + * validates hint direction and falls back to {@code nextRow()} if the hint does not advance in + * the scan direction.
    • + *
    • Composite filter limitation: {@code FilterList}, {@code SkipFilter}, and + * {@code WhileMatchFilter} do not currently delegate this method to wrapped sub-filters. Hints + * from filters used inside these wrappers will be silently ignored.
    • + *
    + * @param firstRowCell the first cell encountered in the rejected row; contains the row key that + * was passed to {@code filterRowKey} + * @return a {@link Cell} representing the earliest position the scanner should seek to, or + * {@code null} if this filter cannot provide a better position than a sequential skip + * @throws IOException in case an I/O or filter-specific failure needs to be signaled + * @see #filterRowKey(Cell) + */ + public Cell getHintForRejectedRow(final Cell firstRowCell) throws IOException { + return null; + } + + /** + * Provides a seek hint for cells that are structurally skipped by the scan pipeline + * before {@link #filterCell(Cell)} is ever reached. The pipeline short-circuits on + * several criteria — time-range mismatch, column-set exclusion, and version-limit exhaustion — + * and in each case the filter is bypassed entirely. When an implementation can compute a + * meaningful forward position purely from the cell's coordinates (without needing the + * {@code filterCell} call sequence), it should return that position here so the scanner can seek + * ahead instead of advancing one cell at a time. + *

    + * Contract: + *

      + *
    • May be called for cells that have never been passed to + * {@link #filterCell(Cell)}.
    • + *
    • Implementations must not modify any filter state; this method is treated + * as logically stateless. Only filters whose hint computation is based solely on immutable + * configuration (e.g. a fixed column range or a fuzzy-row pattern) should override this.
    • + *
    • Returning {@code null} (the default) falls through to the existing structural skip/seek + * behaviour, preserving full backward compatibility.
    • + *
    • For reversed scans, the returned cell must have a smaller row key (i.e., earlier + * in reverse-scan direction) than the {@code skippedCell}. Hints that do not advance in the scan + * direction are silently ignored.
    • + *
    • Composite filter limitation: {@code FilterList}, {@code SkipFilter}, and + * {@code WhileMatchFilter} do not currently delegate this method to wrapped sub-filters. Hints + * from filters used inside these wrappers will be silently ignored.
    • + *
    + * @param skippedCell the cell that was rejected by the time-range, column, or version gate before + * {@code filterCell} could be consulted + * @return a {@link Cell} representing the earliest position the scanner should seek to, or + * {@code null} if this filter cannot provide a better position than the structural hint + * @throws IOException in case an I/O or filter-specific failure needs to be signaled + * @see #filterCell(Cell) + * @see #getNextCellHint(Cell) + */ + public Cell getSkipHint(final Cell skippedCell) throws IOException { + return null; + } + /** * Check that given column family is essential for filter to check row. Most filters always return * true here. But some could have more sophisticated logic which could significantly reduce diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java index 8c330d6aa9b9..e2ca7444bd56 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java @@ -112,6 +112,28 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return null; } + /** + * Filters that cannot provide a seek hint after row-key rejection can inherit this no-op + * implementation. Subclasses whose row-key logic (e.g. a range pointer advanced inside + * {@link #filterRowKey(Cell)}) makes a better seek target available should override this. + * {@inheritDoc} + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return null; + } + + /** + * Filters that cannot provide a structural-skip seek hint can inherit this no-op implementation. + * Subclasses with purely configuration-driven, stateless hint computation (e.g. a fixed column + * range or fuzzy-row pattern) may override this to avoid cell-by-cell advancement when the + * time-range, column, or version gate fires. {@inheritDoc} + */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return null; + } + /** * By default, we require all scan's column families to be present. Our subclasses may be more * precise. {@inheritDoc} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java index c31fc25ec824..77494b6cd2ef 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java @@ -93,6 +93,26 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return this.filter.getNextCellHint(currentCell); } + /** + * Delegates to the wrapped filter's {@link Filter#getHintForRejectedRow(Cell)} so that the scan + * pipeline can seek directly past a rejected row rather than iterating cell-by-cell via + * {@code nextRow()}. {@inheritDoc} + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return this.filter.getHintForRejectedRow(firstRowCell); + } + + /** + * Delegates to the wrapped filter's {@link Filter#getSkipHint(Cell)} so that the scan pipeline + * can seek ahead when a cell is structurally skipped before {@code filterCell} is reached. + * {@inheritDoc} + */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return this.filter.getSkipHint(skippedCell); + } + @Override public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException { // No call to this. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index fe7bf89c0276..51a3e548839b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -81,6 +81,7 @@ public class RegionScannerImpl implements RegionScanner, Shipper, RpcCallback { protected final byte[] stopRow; protected final boolean includeStopRow; + protected final boolean reversed; protected final HRegion region; protected final CellComparator comparator; @@ -123,6 +124,7 @@ private static boolean hasNonce(HRegion region, long nonce) { defaultScannerContext = ScannerContext.newBuilder().setBatchLimit(scan.getBatch()).build(); this.stopRow = scan.getStopRow(); this.includeStopRow = scan.includeStopRow(); + this.reversed = scan.isReversed(); this.operationId = scan.getId(); // synchronize on scannerReadPoints so that nobody calculates @@ -496,11 +498,18 @@ private boolean nextInternal(List results, ScannerContext scannerContext) if (isFilterDoneInternal()) { return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } + // HBASE-29974: ask the filter for a seek hint so we can jump directly past the rejected + // row instead of iterating through its cells one-by-one via nextRow(). + Cell rowHint = getHintForRejectedRow(current); // Typically the count of rows scanned is incremented inside #populateResult. However, // here we are filtering a row based purely on its row key, preventing us from calling - // #populateResult. Thus, perform the necessary increment here to rows scanned metric + // #populateResult. Thus, perform the necessary increment here to rows scanned metric. + // Placed after getHintForRejectedRow so that a buggy filter throwing DNRIOE doesn't + // leave the metric incremented for a row that was never actually processed. incrementCountOfRowsScannedMetric(scannerContext); - boolean moreRows = nextRow(scannerContext, current); + boolean moreRows = (rowHint != null) + ? nextRowViaHint(scannerContext, current, rowHint) + : nextRow(scannerContext, current); if (!moreRows) { return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } @@ -742,6 +751,49 @@ protected boolean nextRow(ScannerContext scannerContext, Cell curRowCell) throws || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); } + /** + * Fast-path alternative to {@link #nextRow} used when the filter has provided a seek hint via + * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}. Instead of iterating + * through every cell in the rejected row one-by-one, this method issues a single seek to jump + * directly to the filter's suggested position ({@code requestSeek} for forward scans, + * {@code backwardSeek} for reversed scans). + */ + private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, Cell hint) + throws IOException { + assert this.joinedContinuationRow == null : "Trying to go to next row during joinedHeap read."; + + int difference = comparator.compareRows(hint, curRowCell); + if ((!reversed && difference > 0) || (reversed && difference < 0)) { + scannerContext.setSkippingRow(true); + if (reversed) { + this.storeHeap.backwardSeek(hint); + } else { + this.storeHeap.requestSeek(hint, true, true); + } + scannerContext.setSkippingRow(false); + + resetFilters(); + + return this.region.getCoprocessorHost() == null + || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); + } + + return nextRow(scannerContext, curRowCell); + } + + /** + * Asks the current {@link org.apache.hadoop.hbase.filter.FilterWrapper} for a seek hint to use + * after a row has been rejected by {@link #filterRowKey}. If the wrapped filter overrides + * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}, this returns its + * answer; otherwise returns {@code null}. + */ + private Cell getHintForRejectedRow(Cell rowCell) throws IOException { + if (filter == null) { + return null; + } + return filter.getHintForRejectedRow(rowCell); + } + protected boolean shouldStop(Cell currentRowCell) { if (currentRowCell == null) { return true; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index 6c3d002b0929..19b90e844062 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -59,6 +59,18 @@ public abstract class UserScanQueryMatcher extends ScanQueryMatcher { private Cell curColCell = null; + /** + * Holds a seek-hint produced by {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} + * at one of the structural short-circuit points in {@link #matchColumn}. When non-null this is + * returned by {@link #getNextKeyHint} instead of delegating to + * {@link org.apache.hadoop.hbase.filter.Filter#getNextCellHint}, because the hint was computed + * for a cell that never reached {@code filterCell}. Cleared on every {@link #getNextKeyHint} call + * so it cannot leak across multiple seek-hint cycles. + */ + private Cell pendingSkipHint = null; + + private final boolean reversed; + private static Cell createStartKey(Scan scan, ScanInfo scanInfo) { if (scan.includeStartRow()) { return createStartKeyFromRow(scan.getStartRow(), scanInfo); @@ -80,6 +92,7 @@ protected UserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker colum this.versionsAfterFilter = 0; } this.stopRow = scan.getStopRow(); + this.reversed = scan.isReversed(); TimeRange timeRange = scan.getColumnFamilyTimeRange().get(scanInfo.getFamily()); if (timeRange == null) { this.tr = scan.getTimeRange(); @@ -105,6 +118,11 @@ public Filter getFilter() { @Override public Cell getNextKeyHint(Cell cell) throws IOException { + if (pendingSkipHint != null) { + Cell hint = pendingSkipHint; + pendingSkipHint = null; + return hint; + } if (filter == null) { return null; } else { @@ -118,20 +136,47 @@ public void beforeShipped() throws IOException { if (curColCell != null) { this.curColCell = KeyValueUtil.toNewKeyCell(this.curColCell); } + if (pendingSkipHint != null) { + this.pendingSkipHint = KeyValueUtil.toNewKeyCell(this.pendingSkipHint); + } + } + + @Override + public void setToNewRow(Cell currentRow) { + pendingSkipHint = null; + super.setToNewRow(currentRow); + } + + @Override + public void clearCurrentRow() { + pendingSkipHint = null; + super.clearCurrentRow(); } + // At each structural short-circuit below (time-range, column-exclusion, version-exhaustion), + // the filter is consulted via resolveSkipHint() before falling back to the default skip/seek + // code. This lets filters provide a forward seek target even when filterCell is never called. protected final MatchCode matchColumn(Cell cell, long timestamp, byte typeByte) throws IOException { int tsCmp = tr.compare(timestamp); if (tsCmp > 0) { + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return MatchCode.SKIP; } if (tsCmp < 0) { + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return columns.getNextRowOrNextColumn(cell); } // STEP 1: Check if the column is part of the requested columns MatchCode matchCode = columns.checkColumn(cell, typeByte); if (matchCode != MatchCode.INCLUDE) { + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return matchCode; } /* @@ -141,8 +186,14 @@ protected final MatchCode matchColumn(Cell cell, long timestamp, byte typeByte) matchCode = columns.checkVersions(cell, timestamp, typeByte, false); switch (matchCode) { case SKIP: + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return MatchCode.SKIP; case SEEK_NEXT_COL: + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; + } return MatchCode.SEEK_NEXT_COL; default: // It means it is INCLUDE, INCLUDE_AND_SEEK_NEXT_COL or INCLUDE_AND_SEEK_NEXT_ROW. @@ -156,6 +207,31 @@ protected final MatchCode matchColumn(Cell cell, long timestamp, byte typeByte) : mergeFilterResponse(cell, matchCode, filter.filterCell(cell)); } + /** + * Asks the current filter for a seek hint via + * {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)}, validates the returned cell, + * and if non-null stores it in {@link #pendingSkipHint} so that {@link #getNextKeyHint} can + * return it when the scan pipeline asks for the seek target after receiving + * {@link ScanQueryMatcher.MatchCode#SEEK_NEXT_USING_HINT}. + */ + private boolean resolveSkipHint(Cell cell) throws IOException { + if (filter == null) { + return false; + } + Cell raw = filter.getSkipHint(cell); + if (raw == null) { + return false; + } + // Full-key compare is intentional: skip hints can advance within the same row + // (e.g., to a later column), not just across rows. + int cmp = rowComparator.compare(raw, cell); + if ((!reversed && cmp <= 0) || (reversed && cmp >= 0)) { + return false; + } + pendingSkipHint = raw; + return true; + } + /** * Call this when scan has filter. Decide the desired behavior by checkVersions's MatchCode and * filterCell's ReturnCode. Cell may be skipped by filter, so the column versions in result may be diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java new file mode 100644 index 000000000000..dfcf1b73809d --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -0,0 +1,644 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.RegionScanner; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; + +@Tag(FilterTests.TAG) +@Tag(MediumTests.TAG) +public class TestFilterHintForRejectedRow { + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private static final byte[] FAMILY = Bytes.toBytes("f"); + private static final byte[] FAMILY2 = Bytes.toBytes("g"); + private static final int CELLS_PER_ROW = 10; + private static final byte[] VALUE = Bytes.toBytes("value"); + + private HRegion region; + + @BeforeEach + public void setUp(TestInfo testInfo) throws Exception { + TableDescriptor tableDescriptor = + TableDescriptorBuilder.newBuilder(TableName.valueOf(testInfo.getTestMethod().get().getName())) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY2)).build(); + RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build(); + this.region = HBaseTestingUtility.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(), + TEST_UTIL.getConfiguration(), tableDescriptor); + } + + @AfterEach + public void tearDown() throws Exception { + HBaseTestingUtility.closeRegionAndWAL(this.region); + } + + private void writeRows(String prefix, int count) throws IOException { + for (int i = 0; i < count; i++) { + byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + for (int j = 0; j < CELLS_PER_ROW; j++) { + p.addColumn(FAMILY, Bytes.toBytes(String.format("q-%02d", j)), VALUE); + } + this.region.put(p); + } + this.region.flush(true); + } + + private void writeRowsMultiFamily(String prefix, int count) throws IOException { + for (int i = 0; i < count; i++) { + byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + for (int j = 0; j < CELLS_PER_ROW; j++) { + byte[] qual = Bytes.toBytes(String.format("q-%02d", j)); + p.addColumn(FAMILY, qual, VALUE); + p.addColumn(FAMILY2, qual, VALUE); + } + this.region.put(p); + } + this.region.flush(true); + } + + private List scanAll(Scan scan) throws IOException { + List results = new ArrayList<>(); + try (RegionScanner scanner = region.getScanner(scan)) { + List rowCells = new ArrayList<>(); + boolean hasMore; + do { + hasMore = scanner.next(rowCells); + results.addAll(rowCells); + rowCells.clear(); + } while (hasMore); + } + return results; + } + + @Test + public void testHintAndNoHintReturnSameCellsOnSameData() throws IOException { + final String prefix = "row"; + final int rejectedCount = 5; + final int acceptedCount = 5; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size()); + assertEquals(1, hintCallCount.get(), + "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows"); + } + + @Test + public void testHintBeyondLastRowTerminatesScanGracefully() throws IOException { + final String prefix = "hint-beyond"; + writeRows(prefix, 5); + + final byte[] beyondAllRows = Bytes.toBytes("zzz-beyond-table-end"); + + FilterBase beyondHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(beyondAllRows); + } + }; + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(beyondHintFilter)); + assertTrue(results.isEmpty(), + "When the hint is past the last row, no cells should be returned"); + } + + @Test + public void testPerRowHintCalledOncePerRejectedRow() throws IOException { + final String prefix = "hint-perrow"; + final int rejectedCount = 4; + final int acceptedCount = 2; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase perRowHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnNextRow(firstRowCell); + } + }; + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(perRowHintFilter)); + + assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); + for (Cell c : results) { + assertTrue( + Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, + acceptedStartRow.length) >= 0, + "Every returned cell must belong to the accepted row range"); + } + assertEquals(rejectedCount, hintCallCount.get(), + "getHintForRejectedRow must be called once per rejected row in the per-row hint strategy"); + } + + @Test + public void testReversedScanWithHint() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Accept rows 00-04 (lower half), reject rows 05-09 (upper half). + // In a reversed scan, the scanner starts at row-09 and moves backward. + final byte[] rejectThreshold = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 4)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + // In reversed scan, hint must point backward (to a smaller row key). + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + }; + + Scan reversedHintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(hintFilter); + Scan reversedNoHintScan = + new Scan().addFamily(FAMILY).setReversed(true).setFilter(noHintFilter); + + List hintResults = scanAll(reversedHintScan); + List noHintResults = scanAll(reversedNoHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(5 * CELLS_PER_ROW, hintResults.size()); + assertEquals(1, hintCallCount.get(), + "getHintForRejectedRow must be called exactly once for reversed scan"); + } + + @Test + public void testGetSkipHintWithTimeRangeIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("skiprow-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(FAMILY, Bytes.toBytes("q"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("q"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + final AtomicInteger skipHintCalls = new AtomicInteger(0); + + FilterBase skipHintFilter = new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + skipHintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnNextRow(skippedCell); + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(skipHintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(rowCount, hintResults.size()); + } + + @Test + public void testBackwardHintFallsBackToNextRow() throws IOException { + final String prefix = "row"; + writeRows(prefix, 5); + + final byte[] row00 = Bytes.toBytes(String.format("%s-%02d", prefix, 0)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 2)); + + FilterBase backwardHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(row00); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(backwardHintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "Backward hint must produce same results as no-hint path"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + } + + @Test + public void testSameRowHintFallsBackToNextRow() throws IOException { + final String prefix = "row"; + writeRows(prefix, 5); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 2)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase sameRowHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(CellUtil.cloneRow(firstRowCell)); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(sameRowHintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "Same-row hint must produce same results as no-hint path"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(3 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testCoprocessorHookCalledOncePerHintedRejection() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + + assertEquals(1, hintCallCount.get()); + assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); + } + + @Test + public void testMultiFamilyScanWithHint() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRowsMultiFamily(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + // Scan both families. + Scan hintScan = new Scan().addFamily(FAMILY).addFamily(FAMILY2).setFilter(hintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).addFamily(FAMILY2).setFilter(noHintFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(2 * CELLS_PER_ROW * acceptedCount, hintResults.size()); + assertEquals(1, hintCallCount.get()); + } + + @Test + public void testDataCorrectnessVsUnfilteredScan() throws IOException { + final String prefix = "row"; + final int totalRows = 8; + writeRows(prefix, totalRows); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + List filteredResults = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + + // Unfiltered scan starting at acceptedStartRow — should return the exact same cells. + Scan unfilteredScan = new Scan().addFamily(FAMILY).withStartRow(acceptedStartRow); + List unfilteredResults = scanAll(unfilteredScan); + + assertEquals(unfilteredResults.size(), filteredResults.size(), + "Filtered and unfiltered scans must return same cell count"); + for (int i = 0; i < filteredResults.size(); i++) { + assertTrue(CellUtil.equals(filteredResults.get(i), unfilteredResults.get(i)), + "Cell mismatch at index " + i); + } + } + + @Test + public void testHintOvershootingStopRowTerminatesGracefully() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Scan rows 00-06 (stopRow=row-07, exclusive). Reject rows 00-02, hint to row-09. + // The hint overshoots the stop row, so the scan should terminate with no results + // from the rejected range and only include rows 03-06 from the non-rejected range. + final byte[] stopRow = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 9)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + // Hint past the stop row — scanner should terminate gracefully. + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + Scan scan = new Scan().addFamily(FAMILY).withStopRow(stopRow).setFilter(hintFilter); + List results = scanAll(scan); + + // The hint jumps past stopRow, so no cells should be returned. + assertTrue(results.isEmpty(), "Hint past stop row must terminate scan with no results"); + } + + @Test + public void testHintWithinStopRowReturnsCorrectResults() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Scan rows 00-06 (stopRow=row-07, exclusive). Reject rows 00-02, hint to row-03. + final byte[] stopRow = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).withStopRow(stopRow).setFilter(hintFilter); + List hintResults = scanAll(hintScan); + + // Should get rows 03-06 (4 rows). + Scan baselineScan = + new Scan().addFamily(FAMILY).withStartRow(acceptedStartRow).withStopRow(stopRow); + List baselineResults = scanAll(baselineScan); + + assertEquals(baselineResults.size(), hintResults.size(), + "Hint within stop row must return same results as baseline"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(4 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testGetSkipHintCalledForReversedScan() throws IOException { + final long insideTs = 1500; + final long tooNewTs = 5000; + final int rowCount = 5; + final byte[] qInside = Bytes.toBytes("q-inside"); + final byte[] qNew = Bytes.toBytes("q-new"); + + // Write two qualifiers per row: "q-inside" at ts=1500 (within range) and "q-new" at + // ts=5000 (above the range upper bound). The time-range gate fires for the too-new cell + // before version tracking can absorb it, ensuring getSkipHint is consulted. + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("skiprev-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(FAMILY, qInside, insideTs, VALUE); + p.addColumn(FAMILY, qNew, tooNewTs, VALUE); + region.put(p); + } + region.flush(true); + + final AtomicInteger skipHintCalls = new AtomicInteger(0); + + // Return null to verify the code path is reached without altering scan semantics. + FilterBase skipHintFilter = new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + skipHintCalls.incrementAndGet(); + return null; + } + }; + + // Reversed scan with time range [1000, 3000): insideTs cells pass, tooNewTs cells are + // structurally skipped (tsCmp > 0). The skip hint should be consulted. + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setReversed(true) + .setFilter(skipHintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setReversed(true); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(rowCount, hintResults.size()); + assertTrue(skipHintCalls.get() > 0, + "getSkipHint must be called at least once for reversed scan"); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java index 54a135b2ff0f..087366162e3a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java @@ -20,6 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import java.io.IOException; import java.util.ArrayList; @@ -290,6 +291,19 @@ public ReturnCode filterKeyValue(final Cell c) throws IOException { } } + private static class FixedSkipHintFilter extends FilterBase { + final Cell hint; + + FixedSkipHintFilter(Cell hint) { + this.hint = hint; + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return hint; + } + } + /** * Here is the unit test for UserScanQueryMatcher#mergeFilterResponse, when the number of cells * exceed the versions requested in scan, we should return SEEK_NEXT_COL, but if current match @@ -393,6 +407,216 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE, assertArrayEquals(nextCell.getQualifierArray(), col4); } + /** Verify that getSkipHint is consulted when a cell is newer than the time-range upper bound. */ + @Test + public void testSkipHintConsultedForCellNewerThanTimeRange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 1500, data); + Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs) + .setFilter(new FixedSkipHintFilter(hintCell)); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now2, null); + + // ts=now2 >= maxTs, so tsCmp > 0: time-range gate fires before filterCell. + KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew)); + assertEquals(hintCell, qm.getNextKeyHint(tooNew)); + assertNull(qm.getNextKeyHint(tooNew)); + } + + /** Verify that getSkipHint is consulted when a cell is older than the time-range lower bound. */ + @Test + public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 500; + long maxTs = now; + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 100, data); + Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs) + .setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now, null); + + // ts = now-1000 < minTs (now-500), so tsCmp < 0: time-range gate fires. + KeyValue tooOld = new KeyValue(row1, fam2, col1, now - 1000, data); + qm.setToNewRow(tooOld); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooOld)); + assertEquals(hintCell, qm.getNextKeyHint(tooOld)); + assertNull(qm.getNextKeyHint(tooOld)); + } + + /** Verify that a null getSkipHint falls back to the original SKIP match code. */ + @Test + public void testNullSkipHintFallsThroughToOriginalCodeOnTimeRangeGate() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + Scan timeRangeScan = + new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new AlwaysIncludeFilter()); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now2, null); + + KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + assertEquals(MatchCode.SKIP, qm.match(tooNew)); + } + + /** Verify that getSkipHint is consulted when a column is excluded by the scan's column set. */ + @Test + public void testSkipHintConsultedForExcludedColumn() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + // get.getFamilyMap().get(fam2) contains col2, col4, col5; presenting col1 triggers exclusion. + KeyValue hintCell = new KeyValue(row1, fam2, col2, 1, data); + Scan scanWithFilter = new Scan(scan).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create( + scanWithFilter, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + get.getFamilyMap().get(fam2), now - ttl, now, null); + + // col1 is not in {col2, col4, col5}: checkColumn returns a non-INCLUDE code. + KeyValue excludedCol = new KeyValue(row1, fam2, col1, 1, data); + qm.setToNewRow(excludedCol); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(excludedCol)); + assertEquals(hintCell, qm.getNextKeyHint(excludedCol)); + assertNull(qm.getNextKeyHint(excludedCol)); + } + + /** Verify that getSkipHint is consulted when the version limit is exhausted. */ + @Test + public void testSkipHintConsultedOnVersionExhaustion() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data); + Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, now - ttl, now, null); + + KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(version1); + assertEquals(MatchCode.INCLUDE, qm.match(version1)); + + // Second version: version limit exceeded, checkVersions returns SEEK_NEXT_COL. + KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); + assertEquals(hintCell, qm.getNextKeyHint(version2)); + assertNull(qm.getNextKeyHint(version2)); + } + + /** Verify that pendingSkipHint is cleared when a row transition occurs via setToNewRow. */ + @Test + public void testPendingSkipHintClearedOnRowTransition() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data); + Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, now - ttl, now, null); + + // Trigger a structural skip that stores pendingSkipHint. + KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(version1); + assertEquals(MatchCode.INCLUDE, qm.match(version1)); + KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); + + // Do NOT consume the hint via getNextKeyHint. Instead, simulate a row transition. + KeyValue newRowCell = new KeyValue(row2, fam2, col1, now - 10, data); + qm.setToNewRow(newRowCell); + + // After row transition, pendingSkipHint must be null — getNextKeyHint should delegate + // to the filter's getNextCellHint (which returns null for FixedSkipHintFilter). + assertNull(qm.getNextKeyHint(newRowCell)); + } + + /** Verify that the normal filterCell/getNextCellHint path is unaffected by pendingSkipHint. */ + @Test + public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + final KeyValue filterHintCell = new KeyValue(row2, fam2, col1, now, data); + + FilterBase seekFilter = new FilterBase() { + @Override + public ReturnCode filterCell(Cell c) { + return ReturnCode.SEEK_NEXT_USING_HINT; + } + + @Override + public Cell getNextCellHint(Cell currentCell) { + return filterHintCell; + } + }; + + Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(seekFilter); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, now - ttl, now, null); + + KeyValue cell = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(cell); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(cell)); + assertEquals(filterHintCell, qm.getNextKeyHint(cell)); + } + + /** Verify that getSkipHint works correctly for reversed scans (hint must be smaller). */ + @Test + public void testSkipHintConsultedForReversedScan() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + // For reversed scan, the hint must point backward (smaller key). + KeyValue hintCell = new KeyValue(row1, fam2, col1, now - 1500, data); + Scan reversedScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setReversed(true) + .setFilter(new FixedSkipHintFilter(hintCell)); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(reversedScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now2, null); + + // ts=now2 >= maxTs so tsCmp > 0: time-range gate fires. + // row2 > row1 in natural order, so for reversed scan hint (row1) is "forward" (smaller). + KeyValue tooNew = new KeyValue(row2, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew)); + assertEquals(hintCell, qm.getNextKeyHint(tooNew)); + assertNull(qm.getNextKeyHint(tooNew)); + } + /** * After enough consecutive range delete markers, the matcher should switch from SKIP to * SEEK_NEXT_COL. Point deletes and KEEP_DELETED_CELLS always SKIP.