From 815d209c9c20c96394da6c5720710400d367466c Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Sat, 7 Mar 2026 18:47:21 +0530 Subject: [PATCH 1/9] HBASE-29974: Fixing under-utilisation of filter hints due to early circuit breaks in scan pipeline, causing unnecessary cell-level iteration. --- .../apache/hadoop/hbase/filter/Filter.java | 69 ++++ .../hadoop/hbase/filter/FilterBase.java | 22 ++ .../hadoop/hbase/filter/FilterWrapper.java | 21 + .../hbase/regionserver/RegionScannerImpl.java | 74 +++- .../querymatcher/UserScanQueryMatcher.java | 107 ++++- .../filter/TestFilterHintForRejectedRow.java | 367 ++++++++++++++++++ .../TestUserScanQueryMatcher.java | 249 ++++++++++++ 7 files changed, 899 insertions(+), 10 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index 6e2249e83e3c..2401e5ff8893 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -188,6 +188,75 @@ public enum ReturnCode { */ abstract public Cell getNextCellHint(final Cell currentCell) throws IOException; + /** + * Provides a seek hint to bypass row-by-row scanning after {@link #filterRowKey(Cell)} rejects a + * row. When {@code filterRowKey} returns {@code true} the scan pipeline would normally iterate + * through every remaining cell in the rejected row one-by-one (via {@code nextRow()}) before + * moving on. If the filter can determine a better forward position — for example, the next range + * boundary in a {@code MultiRowRangeFilter} — it should return that target cell here, allowing + * the scanner to seek directly past the unwanted rows. + * + *

Contract: + *

+ * + * @param firstRowCell the first cell encountered in the rejected row; contains the row key that + * was passed to {@code filterRowKey} + * @return a {@link Cell} representing the earliest position the scanner should seek to, or + * {@code null} if this filter cannot provide a better position than a sequential skip + * @throws IOException in case an I/O or filter-specific failure needs to be signaled + * @see #filterRowKey(Cell) + */ + public Cell getHintForRejectedRow(final Cell firstRowCell) throws IOException { + return null; + } + + /** + * Provides a seek hint for cells that are structurally skipped by the scan pipeline + * before {@link #filterCell(Cell)} is ever reached. The pipeline short-circuits on + * several criteria — time-range mismatch, column-set exclusion, and version-limit exhaustion — + * and in each case the filter is bypassed entirely. When an implementation can compute a + * meaningful forward position purely from the cell's coordinates (without needing the + * {@code filterCell} call sequence), it should return that position here so the scanner can + * seek ahead instead of advancing one cell at a time. + * + *

Contract: + *

+ * + * @param skippedCell the cell that was rejected by the time-range, column, or version gate + * before {@code filterCell} could be consulted + * @return a {@link Cell} representing the earliest position the scanner should seek to, or + * {@code null} if this filter cannot provide a better position than the structural hint + * @throws IOException in case an I/O or filter-specific failure needs to be signaled + * @see #filterCell(Cell) + * @see #getNextCellHint(Cell) + */ + public Cell getSkipHint(final Cell skippedCell) throws IOException { + return null; + } + /** * Check that given column family is essential for filter to check row. Most filters always return * true here. But some could have more sophisticated logic which could significantly reduce diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java index c80da159b7ec..43b0c52f4dfc 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java @@ -98,6 +98,28 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return null; } + /** + * Filters that cannot provide a seek hint after row-key rejection can inherit this no-op + * implementation. Subclasses whose row-key logic (e.g. a range pointer advanced inside + * {@link #filterRowKey(Cell)}) makes a better seek target available should override this. + * {@inheritDoc} + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return null; + } + + /** + * Filters that cannot provide a structural-skip seek hint can inherit this no-op implementation. + * Subclasses with purely configuration-driven, stateless hint computation (e.g. a fixed column + * range or fuzzy-row pattern) may override this to avoid cell-by-cell advancement when the + * time-range, column, or version gate fires. {@inheritDoc} + */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return null; + } + /** * By default, we require all scan's column families to be present. Our subclasses may be more * precise. {@inheritDoc} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java index 4fc2257140e3..007517ed34a6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java @@ -95,6 +95,27 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return this.filter.getNextCellHint(currentCell); } + /** + * Delegates to the wrapped filter's {@link Filter#getHintForRejectedRow(Cell)} so that the scan + * pipeline can seek directly past a rejected row rather than iterating cell-by-cell via + * {@code nextRow()}. + * {@inheritDoc} + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return this.filter.getHintForRejectedRow(firstRowCell); + } + + /** + * Delegates to the wrapped filter's {@link Filter#getSkipHint(Cell)} so that the scan pipeline + * can seek ahead when a cell is structurally skipped before {@code filterCell} is reached. + * {@inheritDoc} + */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return this.filter.getSkipHint(skippedCell); + } + @Override public boolean filterRowKey(Cell cell) throws IOException { if (filterAllRemaining()) return true; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index c69dc6e2df6a..40c817e9b492 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.ExtendedCell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; @@ -502,7 +503,12 @@ private boolean nextInternal(List results, ScannerContext // here we are filtering a row based purely on its row key, preventing us from calling // #populateResult. Thus, perform the necessary increment here to rows scanned metric incrementCountOfRowsScannedMetric(scannerContext); - boolean moreRows = nextRow(scannerContext, current); + // HBASE-29974: ask the filter for a seek hint so we can jump directly past the rejected + // row instead of iterating through its cells one-by-one via nextRow(). + ExtendedCell rowHint = getHintForRejectedRow(current); + boolean moreRows = (rowHint != null) + ? nextRowViaHint(scannerContext, current, rowHint) + : nextRow(scannerContext, current); if (!moreRows) { return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } @@ -744,6 +750,72 @@ protected boolean nextRow(ScannerContext scannerContext, Cell curRowCell) throws || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); } + /** + * Fast-path alternative to {@link #nextRow} used when the filter has provided a seek hint via + * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}. Instead of + * iterating through every cell in the rejected row one-by-one, this method issues a single + * {@code requestSeek} to jump directly to the filter's suggested position. + * + *

The skipping-row mode flag is set around the seek so that block-level size tracking + * continues to function (consistent with {@link #nextRow}), and the filter state is reset + * afterwards so the next row starts with a clean filter context. + * + * @param scannerContext scanner context used for limit tracking + * @param curRowCell the first cell of the row that was rejected by {@code filterRowKey}; + * passed to the coprocessor hook for observability + * @param hint the validated {@link ExtendedCell} returned by the filter; the + * scanner will seek to this position + * @return {@code true} if scanning should continue, {@code false} if a coprocessor requests + * an early stop (mirrors the contract of {@link #nextRow}) + * @throws IOException if the seek or the coprocessor hook signals a failure + */ + private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, ExtendedCell hint) + throws IOException { + assert this.joinedContinuationRow == null : "Trying to go to next row during joinedHeap read."; + + // Enable skipping-row mode so block-size accounting is consistent with nextRow(). + scannerContext.setSkippingRow(true); + this.storeHeap.requestSeek(hint, true, true); + scannerContext.setSkippingRow(false); + + resetFilters(); + + // Notify coprocessors, identical to the epilogue in nextRow(). + return this.region.getCoprocessorHost() == null + || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); + } + + /** + * Asks the current {@link org.apache.hadoop.hbase.filter.FilterWrapper} for a seek hint to use + * after a row has been rejected by {@link #filterRowKey}. If the wrapped filter overrides + * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}, this returns its + * answer as an {@link ExtendedCell}; otherwise returns {@code null}. + * + *

The returned cell is validated to be an {@link ExtendedCell} because filters run on the + * server side and the scanner infrastructure requires {@code ExtendedCell} references. + * + * @param rowCell the first cell of the rejected row (same cell passed to {@code filterRowKey}) + * @return a validated {@link ExtendedCell} seek target, or {@code null} if the filter provides + * no hint + * @throws DoNotRetryIOException if the filter returns a non-{@link ExtendedCell} instance + * @throws IOException if the filter signals an I/O failure + */ + private ExtendedCell getHintForRejectedRow(Cell rowCell) throws IOException { + if (filter == null) { + return null; + } + Cell hint = filter.getHintForRejectedRow(rowCell); + if (hint == null) { + return null; + } + if (!(hint instanceof ExtendedCell)) { + throw new DoNotRetryIOException( + "Incorrect filter implementation: the Cell returned by getHintForRejectedRow " + + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); + } + return (ExtendedCell) hint; + } + protected boolean shouldStop(Cell currentRowCell) { if (currentRowCell == null) { return true; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index c07b91b77e68..f3a99ca0892d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -61,6 +61,16 @@ public abstract class UserScanQueryMatcher extends ScanQueryMatcher { private ExtendedCell curColCell = null; + /** + * Holds a seek-hint produced by {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} + * at one of the structural short-circuit points in {@link #matchColumn}. When non-null this is + * returned by {@link #getNextKeyHint} instead of delegating to + * {@link org.apache.hadoop.hbase.filter.Filter#getNextCellHint}, because the hint was computed + * for a cell that never reached {@code filterCell}. Cleared on every {@link #getNextKeyHint} + * call so it cannot leak across multiple seek-hint cycles. + */ + private ExtendedCell pendingSkipHint = null; + private static ExtendedCell createStartKey(Scan scan, ScanInfo scanInfo) { if (scan.includeStartRow()) { return createStartKeyFromRow(scan.getStartRow(), scanInfo); @@ -107,18 +117,26 @@ public Filter getFilter() { @Override public ExtendedCell getNextKeyHint(ExtendedCell cell) throws IOException { + // A structural short-circuit in matchColumn (time-range, column, or version gate) may have + // stored a hint via resolveSkipHint() before returning SEEK_NEXT_USING_HINT. Drain and return + // it first; it takes priority because it was produced for the exact cell that triggered the + // seek code, without ever calling filterCell. + if (pendingSkipHint != null) { + ExtendedCell hint = pendingSkipHint; + pendingSkipHint = null; + return hint; + } + // Normal path: filterCell returned SEEK_NEXT_USING_HINT — delegate to the filter. if (filter == null) { return null; + } + Cell hint = filter.getNextCellHint(cell); + if (hint == null || hint instanceof ExtendedCell) { + return (ExtendedCell) hint; } else { - Cell hint = filter.getNextCellHint(cell); - if (hint == null || hint instanceof ExtendedCell) { - return (ExtendedCell) hint; - } else { - throw new DoNotRetryIOException("Incorrect filter implementation, " - + "the Cell returned by getNextKeyHint is not an ExtendedCell. Filter class: " - + filter.getClass().getName()); - } - + throw new DoNotRetryIOException("Incorrect filter implementation, " + + "the Cell returned by getNextKeyHint is not an ExtendedCell. Filter class: " + + filter.getClass().getName()); } } @@ -134,14 +152,40 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty throws IOException { int tsCmp = tr.compare(timestamp); if (tsCmp > 0) { + // Cell is newer than the scan's time-range upper bound. Give the filter one last chance to + // provide a seek hint before we fall back to a plain cell-level SKIP. This addresses + // HBASE-29974 Path 2: time-range gate fires before filterCell is reached. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return MatchCode.SKIP; } if (tsCmp < 0) { + // Cell is older than the scan's time-range lower bound. Give the filter a chance to provide + // a seek hint before we defer to the column tracker's next-row/next-column suggestion. + // Addresses HBASE-29974 Path 2: time-range gate fires before filterCell is reached. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return columns.getNextRowOrNextColumn(cell); } // STEP 1: Check if the column is part of the requested columns MatchCode matchCode = columns.checkColumn(cell, typeByte); if (matchCode != MatchCode.INCLUDE) { + // Column is excluded by the scan's column set. Give the filter a chance to provide a + // seek hint before the column-tracker's suggestion is used. Addresses HBASE-29974 Path 3. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return matchCode; } /* @@ -151,8 +195,24 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty matchCode = columns.checkVersions(cell, timestamp, typeByte, false); switch (matchCode) { case SKIP: + // Version limit reached; skip this cell. Give the filter a hint opportunity before + // falling back to SKIP. Addresses HBASE-29974 Path 3: version gate fires before filterCell. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return MatchCode.SKIP; case SEEK_NEXT_COL: + // Version limit reached; advance to the next column. Give the filter a hint opportunity + // before falling back to SEEK_NEXT_COL. Addresses HBASE-29974 Path 3. + if (filter != null) { + ExtendedCell hint = resolveSkipHint(cell); + if (hint != null) { + return MatchCode.SEEK_NEXT_USING_HINT; + } + } return MatchCode.SEEK_NEXT_COL; default: // It means it is INCLUDE, INCLUDE_AND_SEEK_NEXT_COL or INCLUDE_AND_SEEK_NEXT_ROW. @@ -166,6 +226,35 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty : mergeFilterResponse(cell, matchCode, filter.filterCell(cell)); } + /** + * Calls {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} on the current filter, + * validates the returned cell type, and stores it as {@link #pendingSkipHint} so that + * {@link #getNextKeyHint} can return it when the scan pipeline asks for the seek target after + * receiving {@link ScanQueryMatcher.MatchCode#SEEK_NEXT_USING_HINT}. + * + *

This is only called from the structural short-circuit branches of {@link #matchColumn}, + * where {@code filterCell} has not been called, in accordance with the stateless + * contract of {@code Filter#getSkipHint}. + * + * @param cell the cell that triggered the structural short-circuit + * @return the validated {@link ExtendedCell} hint, or {@code null} if the filter returns no hint + * @throws DoNotRetryIOException if the filter returns a non-{@link ExtendedCell} instance + * @throws IOException if the filter signals an I/O failure + */ + private ExtendedCell resolveSkipHint(ExtendedCell cell) throws IOException { + Cell raw = filter.getSkipHint(cell); + if (raw == null) { + return null; + } + if (!(raw instanceof ExtendedCell)) { + throw new DoNotRetryIOException( + "Incorrect filter implementation: the Cell returned by getSkipHint " + + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); + } + pendingSkipHint = (ExtendedCell) raw; + return pendingSkipHint; + } + /** * Call this when scan has filter. Decide the desired behavior by checkVersions's MatchCode and * filterCell's ReturnCode. Cell may be skipped by filter, so the column versions in result may be diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java new file mode 100644 index 000000000000..7207fa245d83 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -0,0 +1,367 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.RegionScanner; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +/** + * Integration tests for HBASE-29974 Path 1: + * {@link Filter#getHintForRejectedRow(Cell)} allows the scan pipeline to seek directly past + * rejected rows instead of iterating through every cell in each rejected row one-by-one via + * {@code nextRow()}. + * + *

Each test verifies two properties simultaneously: + *

    + *
  1. Correctness — the scan returns exactly the rows that are not rejected by + * {@link Filter#filterRowKey(Cell)}, regardless of whether the hint path or the + * legacy cell-iteration path is used.
  2. + *
  3. Efficiency — when a filter provides a hint that jumps over all N rejected rows + * in one seek, {@code getHintForRejectedRow} is called exactly once (not N times), proving + * that the scanner did not fall back to cell-by-cell iteration for the skipped rows.
  4. + *
+ */ +@Category({ FilterTests.class, MediumTests.class }) +public class TestFilterHintForRejectedRow { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestFilterHintForRejectedRow.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + + private static final byte[] FAMILY = Bytes.toBytes("f"); + private static final int CELLS_PER_ROW = 10; + private static final byte[] VALUE = Bytes.toBytes("value"); + + private HRegion region; + + @Rule + public TestName name = new TestName(); + + @Before + public void setUp() throws Exception { + TableDescriptor tableDescriptor = + TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName())) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); + RegionInfo info = + RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build(); + this.region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(), + TEST_UTIL.getConfiguration(), tableDescriptor); + } + + @After + public void tearDown() throws Exception { + HBaseTestingUtil.closeRegionAndWAL(this.region); + } + + // ------------------------------------------------------------------------- + // Helper + // ------------------------------------------------------------------------- + + /** + * Writes {@code count} rows into the test region. Row keys are formatted as + * {@code "row-XX"} (zero-padded to two digits) and each row contains + * {@link #CELLS_PER_ROW} cells with qualifier {@code "q-NN"}. + * + * @param prefix string prefix used for both row keys and qualifier names + * @param count number of rows to write + * @throws IOException if any put fails + */ + private void writeRows(String prefix, int count) throws IOException { + for (int i = 0; i < count; i++) { + byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + for (int j = 0; j < CELLS_PER_ROW; j++) { + p.addColumn(FAMILY, Bytes.toBytes(String.format("q-%02d", j)), VALUE); + } + this.region.put(p); + } + this.region.flush(true); + } + + // ------------------------------------------------------------------------- + // Tests + // ------------------------------------------------------------------------- + + /** + * HBASE-29974 Path 1 — single-batch seek hint. + * + *

The filter rejects the first 5 rows ({@code "row-00"} through {@code "row-04"}) via + * {@link Filter#filterRowKey(Cell)} and, for every rejected row, returns a hint pointing + * directly to {@code "row-05"} via {@link Filter#getHintForRejectedRow(Cell)}. + * + *

Expected behaviour: + *

+ */ + @Test + public void testHintJumpsOverAllRejectedRowsInOneSingleSeek() throws IOException { + final String prefix = "hint-single"; + final int rejectedCount = 5; + final int acceptedCount = 5; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + // Reject any row whose key is lexicographically less than acceptedStartRow. + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + /** + * Returns a hint pointing directly to the first accepted row, regardless of which + * rejected row triggered this call. Because the hint bypasses all remaining rejected + * rows in one seek, this method should be invoked exactly once for the whole scan. + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + List results = new ArrayList<>(); + try (RegionScanner scanner = region.getScanner(new Scan().addFamily(FAMILY) + .setFilter(hintFilter))) { + List rowCells = new ArrayList<>(); + boolean hasMore; + do { + hasMore = scanner.next(rowCells); + results.addAll(rowCells); + rowCells.clear(); + } while (hasMore); + } + + // ----- Correctness assertions ----- + assertEquals( + "Scan must return exactly the cells from the " + acceptedCount + " accepted rows", + acceptedCount * CELLS_PER_ROW, results.size()); + for (Cell c : results) { + assertTrue("Every returned cell must belong to the accepted row range", + Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, + acceptedStartRow.length) >= 0); + } + + // ----- Efficiency assertion ----- + // The hint jumps over all 5 rejected rows in one seek, so the filter should be asked for + // a hint exactly once — not once per rejected row. + assertEquals( + "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows", + 1, hintCallCount.get()); + } + + /** + * HBASE-29974 Path 1 — backward compatibility: null hint falls through to {@code nextRow()}. + * + *

When {@link Filter#getHintForRejectedRow(Cell)} returns {@code null} (the default from + * {@link FilterBase}), the scanner must fall back to the existing cell-by-cell {@code nextRow()} + * behaviour and still return the correct results. This test acts as a regression guard to + * confirm that the null-hint path does not alter observable scan results. + */ + @Test + public void testNullHintFallsThroughToLegacyNextRowBehaviour() throws IOException { + final String prefix = "hint-null"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + + // This filter rejects rows but does NOT override getHintForRejectedRow (returns null). + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List results = new ArrayList<>(); + try (RegionScanner scanner = region.getScanner(new Scan().addFamily(FAMILY) + .setFilter(noHintFilter))) { + List rowCells = new ArrayList<>(); + boolean hasMore; + do { + hasMore = scanner.next(rowCells); + results.addAll(rowCells); + rowCells.clear(); + } while (hasMore); + } + + assertEquals( + "Null-hint path must still return the correct cells from the accepted rows", + acceptedCount * CELLS_PER_ROW, results.size()); + for (Cell c : results) { + assertTrue("Every returned cell must belong to the accepted row range", + Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, + acceptedStartRow.length) >= 0); + } + } + + /** + * HBASE-29974 Path 1 — hint pointing beyond the last row terminates the scan cleanly. + * + *

If the filter's {@link Filter#getHintForRejectedRow(Cell)} returns a position that is + * past the end of the table (beyond all written rows), the scan must complete without + * returning any results and without throwing an exception. + */ + @Test + public void testHintBeyondLastRowTerminatesScanGracefully() throws IOException { + final String prefix = "hint-beyond"; + writeRows(prefix, 5); + + // The hint points past "zzz", which is lexicographically after all "hint-beyond-XX" rows. + final byte[] beyondAllRows = Bytes.toBytes("zzz-beyond-table-end"); + + FilterBase beyondHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; // reject every row + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(beyondAllRows); + } + }; + + List results = new ArrayList<>(); + try (RegionScanner scanner = region.getScanner(new Scan().addFamily(FAMILY) + .setFilter(beyondHintFilter))) { + List rowCells = new ArrayList<>(); + boolean hasMore; + do { + hasMore = scanner.next(rowCells); + results.addAll(rowCells); + rowCells.clear(); + } while (hasMore); + } + + assertTrue( + "When the hint is past the last row, no cells should be returned", results.isEmpty()); + } + + /** + * HBASE-29974 Path 1 — hint for every rejected row (per-row hint stepping). + * + *

When the filter provides a hint that advances only one row at a time (i.e. it always + * points to the immediate next row key), {@code getHintForRejectedRow} is called once per + * rejected row. This verifies that the hint mechanism works correctly in the incremental-step + * case, not just the bulk-jump case. + */ + @Test + public void testPerRowHintCalledOncePerRejectedRow() throws IOException { + final String prefix = "hint-perrow"; + final int rejectedCount = 4; + final int acceptedCount = 2; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase perRowHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + /** + * Returns a hint pointing to the cell immediately after the current one (one row at a + * time). This causes the scanner to seek per-row, so this method is called once for + * each rejected row. + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + // Create a key that is one step past the current row key. + return PrivateCellUtil.createFirstOnNextRow(firstRowCell); + } + }; + + List results = new ArrayList<>(); + try (RegionScanner scanner = region.getScanner(new Scan().addFamily(FAMILY) + .setFilter(perRowHintFilter))) { + List rowCells = new ArrayList<>(); + boolean hasMore; + do { + hasMore = scanner.next(rowCells); + results.addAll(rowCells); + rowCells.clear(); + } while (hasMore); + } + + // ----- Correctness ----- + assertEquals( + "Scan must return exactly the cells from the " + acceptedCount + " accepted rows", + acceptedCount * CELLS_PER_ROW, results.size()); + for (Cell c : results) { + assertTrue("Every returned cell must belong to the accepted row range", + Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, + acceptedStartRow.length) >= 0); + } + + // ----- Hint call count ----- + // One call per rejected row: each per-row hint advances the scanner by exactly one row. + assertEquals( + "getHintForRejectedRow must be called once per rejected row in the per-row hint strategy", + rejectedCount, hintCallCount.get()); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java index efbb4c77d475..66c9437fa6c7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; import java.io.IOException; import java.util.ArrayList; @@ -294,6 +295,26 @@ public ReturnCode filterCell(final Cell c) { } } + /** + * A filter whose {@link #getSkipHint(Cell)} always returns a fixed {@link ExtendedCell} target, + * simulating a range-driven filter (e.g. {@code MultiRowRangeFilter}) that knows the next + * interesting position without inspecting the skipped cell. The filter's + * {@link #filterCell(Cell)} is intentionally left as the default include-all so that it does + * not interfere with tests that need cells to reach that method. + */ + private static class FixedSkipHintFilter extends FilterBase { + final ExtendedCell hint; + + FixedSkipHintFilter(ExtendedCell hint) { + this.hint = hint; + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return hint; + } + } + /** * Here is the unit test for UserScanQueryMatcher#mergeFilterResponse, when the number of cells * exceed the versions requested in scan, we should return SEEK_NEXT_COL, but if current match @@ -396,4 +417,232 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE, Cell nextCell = qm.getKeyForNextColumn(lastCell); assertArrayEquals(nextCell.getQualifierArray(), col4); } + + // ----------------------------------------------------------------------- + // Tests for HBASE-29974: Filter#getSkipHint consulted at matchColumn + // structural short-circuits (time-range, column, and version gates). + // ----------------------------------------------------------------------- + + /** + * HBASE-29974 Path 2 — time-range upper-bound gate (tsCmp > 0). + * + *

When a cell's timestamp is newer than the scan's time-range maximum ({@code ts >= maxTs}), + * the original code returns {@link MatchCode#SKIP} without ever calling {@code filterCell}. + * With the fix, the matcher must first ask the filter for a skip hint. If the filter returns a + * non-null hint, {@link MatchCode#SEEK_NEXT_USING_HINT} must be returned and + * {@link UserScanQueryMatcher#getNextKeyHint} must return that hint. + */ + @Test + public void testSkipHintConsultedForCellNewerThanTimeRange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + // The hint points to a cell in the middle of the valid time range so the scanner can resume. + KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 1500, data); + Scan timeRangeScan = + new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new FixedSkipHintFilter(hintCell)); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + null, 0, now2, null); + + // ts=now2 >= maxTs, so tsCmp > 0: time-range gate fires before filterCell (Path 2). + KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + assertEquals("Filter hint must promote SKIP to SEEK_NEXT_USING_HINT", + MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew)); + assertEquals("getNextKeyHint must return the pending skip hint", hintCell, + qm.getNextKeyHint(tooNew)); + // pendingSkipHint is consumed on the first getNextKeyHint call; subsequent calls fall through + // to FilterBase#getNextCellHint which returns null. + assertNull("pendingSkipHint must be cleared after being drained", qm.getNextKeyHint(tooNew)); + } + + /** + * HBASE-29974 Path 2 — time-range lower-bound gate (tsCmp < 0). + * + *

When a cell's timestamp is older than the scan's time-range minimum ({@code ts < minTs}), + * the original code returns the column-tracker's next-row-or-next-column suggestion without + * consulting the filter. With the fix, the matcher must first ask the filter for a skip hint, + * and prefer it over the column-tracker's answer when non-null. + */ + @Test + public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 500; + long maxTs = now; + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 100, data); + Scan timeRangeScan = + new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + null, 0, now, null); + + // ts = now-1000 < minTs (now-500), so tsCmp < 0: time-range gate fires (Path 2). + KeyValue tooOld = new KeyValue(row1, fam2, col1, now - 1000, data); + qm.setToNewRow(tooOld); + + assertEquals("Filter hint must promote column-tracker result to SEEK_NEXT_USING_HINT", + MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooOld)); + assertEquals("getNextKeyHint must return the pending skip hint", hintCell, + qm.getNextKeyHint(tooOld)); + assertNull("pendingSkipHint must be cleared after being drained", qm.getNextKeyHint(tooOld)); + } + + /** + * HBASE-29974 Path 2 — time-range gate, null hint falls through. + * + *

When the filter's {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} returns + * {@code null}, the matcher must fall back to the original structural skip code (SKIP or the + * column-tracker result) with no regression. + */ + @Test + public void testNullSkipHintFallsThroughToOriginalCodeOnTimeRangeGate() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + // AlwaysIncludeFilter does not override getSkipHint(), so it returns null. + Scan timeRangeScan = + new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new AlwaysIncludeFilter()); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + null, 0, now2, null); + + KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + // With a null hint the gate must still return the original SKIP code. + assertEquals("Null getSkipHint must not change the original SKIP match code", + MatchCode.SKIP, qm.match(tooNew)); + } + + /** + * HBASE-29974 Path 3 — column-exclusion gate ({@code checkColumn() != INCLUDE}). + * + *

When an explicit-column scan encounters a qualifier not in its requested set, the column + * tracker returns a skip/seek code before {@code filterCell} is ever reached. With the fix, + * the matcher must consult {@code getSkipHint} first, and return + * {@link MatchCode#SEEK_NEXT_USING_HINT} when a non-null hint is available. + */ + @Test + public void testSkipHintConsultedForExcludedColumn() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + // get.getFamilyMap().get(fam2) contains col2, col4, col5; presenting col1 triggers exclusion. + KeyValue hintCell = new KeyValue(row1, fam2, col2, 1, data); + Scan scanWithFilter = new Scan(scan).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + get.getFamilyMap().get(fam2), now - ttl, now, null); + + // col1 is not in {col2, col4, col5}: checkColumn returns a non-INCLUDE code. + KeyValue excludedCol = new KeyValue(row1, fam2, col1, 1, data); + qm.setToNewRow(excludedCol); + + assertEquals("Filter hint must promote column-exclusion result to SEEK_NEXT_USING_HINT", + MatchCode.SEEK_NEXT_USING_HINT, qm.match(excludedCol)); + assertEquals("getNextKeyHint must return the pending skip hint", hintCell, + qm.getNextKeyHint(excludedCol)); + assertNull("pendingSkipHint must be cleared after being drained", + qm.getNextKeyHint(excludedCol)); + } + + /** + * HBASE-29974 Path 3 — version-exhaustion gate ({@code checkVersions()} returns + * {@link MatchCode#SEEK_NEXT_COL}). + * + *

When the column tracker has already seen the maximum number of versions for a qualifier, + * it returns SKIP or SEEK_NEXT_COL for subsequent versions, again bypassing {@code filterCell}. + * With the fix, the matcher must consult {@code getSkipHint} at that point. + */ + @Test + public void testSkipHintConsultedOnVersionExhaustion() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data); + // ScanInfo maxVersions=1: column tracker allows exactly 1 version per qualifier. + Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + null, now - ttl, now, null); + + // First version of col1: passes all gates and reaches filterCell. + KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(version1); + MatchCode firstCode = qm.match(version1); + assertEquals("First version must be included", MatchCode.INCLUDE, firstCode); + + // Second version of col1: checkVersions returns SEEK_NEXT_COL (version limit exceeded). + // The filter hint must be consulted and SEEK_NEXT_USING_HINT returned. + KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data); + assertEquals("Filter hint must promote version-exhaustion result to SEEK_NEXT_USING_HINT", + MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); + assertEquals("getNextKeyHint must return the pending skip hint from version gate", hintCell, + qm.getNextKeyHint(version2)); + assertNull("pendingSkipHint must be cleared after being drained", + qm.getNextKeyHint(version2)); + } + + /** + * HBASE-29974 — {@code pendingSkipHint} must not interfere with the normal + * {@code filterCell → SEEK_NEXT_USING_HINT → getNextCellHint} path. + * + *

If a cell passes all structural gates and reaches {@code filterCell}, which then returns + * {@link ReturnCode#SEEK_NEXT_USING_HINT}, the existing {@link Filter#getNextCellHint(Cell)} + * mechanism must still be used (not {@code getSkipHint}), and {@code pendingSkipHint} must + * remain null so it does not contaminate the result. + */ + @Test + public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + final KeyValue filterHintCell = new KeyValue(row2, fam2, col1, now, data); + + // A filter that returns SEEK_NEXT_USING_HINT from filterCell and provides a hint through + // getNextCellHint, while NOT overriding getSkipHint (returns null by default). + FilterBase seekFilter = new FilterBase() { + @Override + public ReturnCode filterCell(Cell c) { + return ReturnCode.SEEK_NEXT_USING_HINT; + } + + @Override + public Cell getNextCellHint(Cell currentCell) { + return filterHintCell; + } + }; + + Scan scanWithFilter = + new Scan().addFamily(fam2).setFilter(seekFilter); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + null, now - ttl, now, null); + + KeyValue cell = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(cell); + + assertEquals("filterCell path must still return SEEK_NEXT_USING_HINT", + MatchCode.SEEK_NEXT_USING_HINT, qm.match(cell)); + // pendingSkipHint was never set (getSkipHint returned null), so getNextKeyHint must + // delegate to filter.getNextCellHint(). + assertEquals("getNextKeyHint must delegate to getNextCellHint when no skip hint is pending", + filterHintCell, qm.getNextKeyHint(cell)); + } } From bc242485fee427ae9c3bb54f1e06a5c8132c92a5 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Thu, 12 Mar 2026 09:12:27 +0530 Subject: [PATCH 2/9] Reduced code duplication. --- .../querymatcher/UserScanQueryMatcher.java | 59 ++++++++----------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index f3a99ca0892d..715fe77ae576 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -155,11 +155,8 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty // Cell is newer than the scan's time-range upper bound. Give the filter one last chance to // provide a seek hint before we fall back to a plain cell-level SKIP. This addresses // HBASE-29974 Path 2: time-range gate fires before filterCell is reached. - if (filter != null) { - ExtendedCell hint = resolveSkipHint(cell); - if (hint != null) { - return MatchCode.SEEK_NEXT_USING_HINT; - } + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; } return MatchCode.SKIP; } @@ -167,11 +164,8 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty // Cell is older than the scan's time-range lower bound. Give the filter a chance to provide // a seek hint before we defer to the column tracker's next-row/next-column suggestion. // Addresses HBASE-29974 Path 2: time-range gate fires before filterCell is reached. - if (filter != null) { - ExtendedCell hint = resolveSkipHint(cell); - if (hint != null) { - return MatchCode.SEEK_NEXT_USING_HINT; - } + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; } return columns.getNextRowOrNextColumn(cell); } @@ -180,11 +174,8 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty if (matchCode != MatchCode.INCLUDE) { // Column is excluded by the scan's column set. Give the filter a chance to provide a // seek hint before the column-tracker's suggestion is used. Addresses HBASE-29974 Path 3. - if (filter != null) { - ExtendedCell hint = resolveSkipHint(cell); - if (hint != null) { - return MatchCode.SEEK_NEXT_USING_HINT; - } + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; } return matchCode; } @@ -197,21 +188,15 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty case SKIP: // Version limit reached; skip this cell. Give the filter a hint opportunity before // falling back to SKIP. Addresses HBASE-29974 Path 3: version gate fires before filterCell. - if (filter != null) { - ExtendedCell hint = resolveSkipHint(cell); - if (hint != null) { - return MatchCode.SEEK_NEXT_USING_HINT; - } + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; } return MatchCode.SKIP; case SEEK_NEXT_COL: // Version limit reached; advance to the next column. Give the filter a hint opportunity // before falling back to SEEK_NEXT_COL. Addresses HBASE-29974 Path 3. - if (filter != null) { - ExtendedCell hint = resolveSkipHint(cell); - if (hint != null) { - return MatchCode.SEEK_NEXT_USING_HINT; - } + if (resolveSkipHint(cell)) { + return MatchCode.SEEK_NEXT_USING_HINT; } return MatchCode.SEEK_NEXT_COL; default: @@ -227,24 +212,30 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty } /** - * Calls {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} on the current filter, - * validates the returned cell type, and stores it as {@link #pendingSkipHint} so that - * {@link #getNextKeyHint} can return it when the scan pipeline asks for the seek target after - * receiving {@link ScanQueryMatcher.MatchCode#SEEK_NEXT_USING_HINT}. + * Asks the current filter for a seek hint via + * {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)}, validates the returned cell + * type, and if non-null stores it in {@link #pendingSkipHint} so that {@link #getNextKeyHint} + * can return it when the scan pipeline asks for the seek target after receiving + * {@link ScanQueryMatcher.MatchCode#SEEK_NEXT_USING_HINT}. * *

This is only called from the structural short-circuit branches of {@link #matchColumn}, * where {@code filterCell} has not been called, in accordance with the stateless - * contract of {@code Filter#getSkipHint}. + * contract of {@code Filter#getSkipHint}. The filter-null guard is included here so call-sites + * need no boilerplate. * * @param cell the cell that triggered the structural short-circuit - * @return the validated {@link ExtendedCell} hint, or {@code null} if the filter returns no hint + * @return {@code true} if the filter returned a valid hint (stored in {@link #pendingSkipHint}), + * {@code false} if no filter is set or the filter returned {@code null} * @throws DoNotRetryIOException if the filter returns a non-{@link ExtendedCell} instance * @throws IOException if the filter signals an I/O failure */ - private ExtendedCell resolveSkipHint(ExtendedCell cell) throws IOException { + private boolean resolveSkipHint(ExtendedCell cell) throws IOException { + if (filter == null) { + return false; + } Cell raw = filter.getSkipHint(cell); if (raw == null) { - return null; + return false; } if (!(raw instanceof ExtendedCell)) { throw new DoNotRetryIOException( @@ -252,7 +243,7 @@ private ExtendedCell resolveSkipHint(ExtendedCell cell) throws IOException { + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); } pendingSkipHint = (ExtendedCell) raw; - return pendingSkipHint; + return true; } /** From 5b8b8d9c6379afc4b8e7406111d002135bf8ad14 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Fri, 13 Mar 2026 10:25:58 +0530 Subject: [PATCH 3/9] Added safe guard. --- .../hbase/regionserver/RegionScannerImpl.java | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 40c817e9b492..8a7cd3fd3368 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -83,6 +83,7 @@ public class RegionScannerImpl implements RegionScanner, Shipper, RpcCallback { protected final byte[] stopRow; protected final boolean includeStopRow; + protected final boolean reversed; protected final HRegion region; protected final CellComparator comparator; @@ -125,6 +126,7 @@ private static boolean hasNonce(HRegion region, long nonce) { defaultScannerContext = ScannerContext.newBuilder().setBatchLimit(scan.getBatch()).build(); this.stopRow = scan.getStopRow(); this.includeStopRow = scan.includeStopRow(); + this.reversed = scan.isReversed(); this.operationId = scan.getId(); // synchronize on scannerReadPoints so that nobody calculates @@ -773,16 +775,21 @@ private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, E throws IOException { assert this.joinedContinuationRow == null : "Trying to go to next row during joinedHeap read."; - // Enable skipping-row mode so block-size accounting is consistent with nextRow(). - scannerContext.setSkippingRow(true); - this.storeHeap.requestSeek(hint, true, true); - scannerContext.setSkippingRow(false); + int difference = comparator.compare(hint, curRowCell); + if ((!reversed && difference > 0) || (reversed && difference < 0)) { + // Enable skipping-row mode so block-size accounting is consistent with nextRow(). + scannerContext.setSkippingRow(true); + this.storeHeap.requestSeek(hint, true, true); + scannerContext.setSkippingRow(false); - resetFilters(); + resetFilters(); - // Notify coprocessors, identical to the epilogue in nextRow(). - return this.region.getCoprocessorHost() == null - || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); + // Notify coprocessors, identical to the epilogue in nextRow(). + return this.region.getCoprocessorHost() == null + || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); + } + + return nextRow(scannerContext, curRowCell); } /** From 9da30dcdd7d9c13d77aa35547091e491eaa389c4 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Fri, 10 Apr 2026 20:38:06 +0530 Subject: [PATCH 4/9] Applied spotless command. --- .../apache/hadoop/hbase/filter/Filter.java | 33 ++--- .../hadoop/hbase/filter/FilterWrapper.java | 3 +- .../hbase/regionserver/RegionScannerImpl.java | 32 ++--- .../querymatcher/UserScanQueryMatcher.java | 19 ++- .../filter/TestFilterHintForRejectedRow.java | 132 +++++++++--------- .../TestUserScanQueryMatcher.java | 96 ++++++------- 6 files changed, 152 insertions(+), 163 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index 2401e5ff8893..b3faa15e88c3 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -195,21 +195,20 @@ public enum ReturnCode { * moving on. If the filter can determine a better forward position — for example, the next range * boundary in a {@code MultiRowRangeFilter} — it should return that target cell here, allowing * the scanner to seek directly past the unwanted rows. - * - *

Contract: + *

+ * Contract: *

    *
  • Only called after {@link #filterRowKey(Cell)} has returned {@code true} for the same * {@code firstRowCell}.
  • *
  • Implementations may use state that was set during {@link #filterRowKey(Cell)} (e.g. an - * updated range pointer), but must not invoke {@link #filterCell(Cell)} - * logic — the caller guarantees that {@code filterCell} has not been called for this row.
  • + * updated range pointer), but must not invoke {@link #filterCell(Cell)} logic — + * the caller guarantees that {@code filterCell} has not been called for this row. *
  • The returned {@link Cell}, if non-null, must be an * {@link org.apache.hadoop.hbase.ExtendedCell} because filters are evaluated on the server * side.
  • *
  • Returning {@code null} (the default) falls through to the existing {@code nextRow()} * behaviour, preserving full backward compatibility.
  • *
- * * @param firstRowCell the first cell encountered in the rejected row; contains the row key that * was passed to {@code filterRowKey} * @return a {@link Cell} representing the earliest position the scanner should seek to, or @@ -227,26 +226,24 @@ public Cell getHintForRejectedRow(final Cell firstRowCell) throws IOException { * several criteria — time-range mismatch, column-set exclusion, and version-limit exhaustion — * and in each case the filter is bypassed entirely. When an implementation can compute a * meaningful forward position purely from the cell's coordinates (without needing the - * {@code filterCell} call sequence), it should return that position here so the scanner can - * seek ahead instead of advancing one cell at a time. - * - *

Contract: + * {@code filterCell} call sequence), it should return that position here so the scanner can seek + * ahead instead of advancing one cell at a time. + *

+ * Contract: *

    *
  • May be called for cells that have never been passed to * {@link #filterCell(Cell)}.
  • - *
  • Implementations must not modify any filter state; this method is - * treated as logically stateless. Only filters whose hint computation is based solely on - * immutable configuration (e.g. a fixed column range or a fuzzy-row pattern) should - * override this.
  • + *
  • Implementations must not modify any filter state; this method is treated + * as logically stateless. Only filters whose hint computation is based solely on immutable + * configuration (e.g. a fixed column range or a fuzzy-row pattern) should override this.
  • *
  • The returned {@link Cell}, if non-null, must be an * {@link org.apache.hadoop.hbase.ExtendedCell} because filters are evaluated on the server * side.
  • - *
  • Returning {@code null} (the default) falls through to the existing structural - * skip/seek behaviour, preserving full backward compatibility.
  • + *
  • Returning {@code null} (the default) falls through to the existing structural skip/seek + * behaviour, preserving full backward compatibility.
  • *
- * - * @param skippedCell the cell that was rejected by the time-range, column, or version gate - * before {@code filterCell} could be consulted + * @param skippedCell the cell that was rejected by the time-range, column, or version gate before + * {@code filterCell} could be consulted * @return a {@link Cell} representing the earliest position the scanner should seek to, or * {@code null} if this filter cannot provide a better position than the structural hint * @throws IOException in case an I/O or filter-specific failure needs to be signaled diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java index 007517ed34a6..b1039cb25679 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java @@ -98,8 +98,7 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { /** * Delegates to the wrapped filter's {@link Filter#getHintForRejectedRow(Cell)} so that the scan * pipeline can seek directly past a rejected row rather than iterating cell-by-cell via - * {@code nextRow()}. - * {@inheritDoc} + * {@code nextRow()}. {@inheritDoc} */ @Override public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 8a0eb8542c6f..625064146eb5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -760,21 +760,20 @@ protected boolean nextRow(ScannerContext scannerContext, Cell curRowCell) throws /** * Fast-path alternative to {@link #nextRow} used when the filter has provided a seek hint via - * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}. Instead of - * iterating through every cell in the rejected row one-by-one, this method issues a single + * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}. Instead of iterating + * through every cell in the rejected row one-by-one, this method issues a single * {@code requestSeek} to jump directly to the filter's suggested position. - * - *

The skipping-row mode flag is set around the seek so that block-level size tracking - * continues to function (consistent with {@link #nextRow}), and the filter state is reset - * afterwards so the next row starts with a clean filter context. - * + *

+ * The skipping-row mode flag is set around the seek so that block-level size tracking continues + * to function (consistent with {@link #nextRow}), and the filter state is reset afterwards so the + * next row starts with a clean filter context. * @param scannerContext scanner context used for limit tracking * @param curRowCell the first cell of the row that was rejected by {@code filterRowKey}; * passed to the coprocessor hook for observability - * @param hint the validated {@link ExtendedCell} returned by the filter; the - * scanner will seek to this position - * @return {@code true} if scanning should continue, {@code false} if a coprocessor requests - * an early stop (mirrors the contract of {@link #nextRow}) + * @param hint the validated {@link ExtendedCell} returned by the filter; the scanner + * will seek to this position + * @return {@code true} if scanning should continue, {@code false} if a coprocessor requests an + * early stop (mirrors the contract of {@link #nextRow}) * @throws IOException if the seek or the coprocessor hook signals a failure */ private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, ExtendedCell hint) @@ -803,13 +802,12 @@ private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, E * after a row has been rejected by {@link #filterRowKey}. If the wrapped filter overrides * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}, this returns its * answer as an {@link ExtendedCell}; otherwise returns {@code null}. - * - *

The returned cell is validated to be an {@link ExtendedCell} because filters run on the - * server side and the scanner infrastructure requires {@code ExtendedCell} references. - * + *

+ * The returned cell is validated to be an {@link ExtendedCell} because filters run on the server + * side and the scanner infrastructure requires {@code ExtendedCell} references. * @param rowCell the first cell of the rejected row (same cell passed to {@code filterRowKey}) - * @return a validated {@link ExtendedCell} seek target, or {@code null} if the filter provides - * no hint + * @return a validated {@link ExtendedCell} seek target, or {@code null} if the filter provides no + * hint * @throws DoNotRetryIOException if the filter returns a non-{@link ExtendedCell} instance * @throws IOException if the filter signals an I/O failure */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index 715fe77ae576..bcdea72368bf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -66,8 +66,8 @@ public abstract class UserScanQueryMatcher extends ScanQueryMatcher { * at one of the structural short-circuit points in {@link #matchColumn}. When non-null this is * returned by {@link #getNextKeyHint} instead of delegating to * {@link org.apache.hadoop.hbase.filter.Filter#getNextCellHint}, because the hint was computed - * for a cell that never reached {@code filterCell}. Cleared on every {@link #getNextKeyHint} - * call so it cannot leak across multiple seek-hint cycles. + * for a cell that never reached {@code filterCell}. Cleared on every {@link #getNextKeyHint} call + * so it cannot leak across multiple seek-hint cycles. */ private ExtendedCell pendingSkipHint = null; @@ -214,15 +214,14 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty /** * Asks the current filter for a seek hint via * {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)}, validates the returned cell - * type, and if non-null stores it in {@link #pendingSkipHint} so that {@link #getNextKeyHint} - * can return it when the scan pipeline asks for the seek target after receiving + * type, and if non-null stores it in {@link #pendingSkipHint} so that {@link #getNextKeyHint} can + * return it when the scan pipeline asks for the seek target after receiving * {@link ScanQueryMatcher.MatchCode#SEEK_NEXT_USING_HINT}. - * - *

This is only called from the structural short-circuit branches of {@link #matchColumn}, - * where {@code filterCell} has not been called, in accordance with the stateless - * contract of {@code Filter#getSkipHint}. The filter-null guard is included here so call-sites - * need no boilerplate. - * + *

+ * This is only called from the structural short-circuit branches of {@link #matchColumn}, where + * {@code filterCell} has not been called, in accordance with the stateless contract of + * {@code Filter#getSkipHint}. The filter-null guard is included here so call-sites need no + * boilerplate. * @param cell the cell that triggered the structural short-circuit * @return {@code true} if the filter returned a valid hint (stored in {@link #pendingSkipHint}), * {@code false} if no filter is set or the filter returned {@code null} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java index 7207fa245d83..a381f950ed20 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -51,19 +51,18 @@ import org.junit.rules.TestName; /** - * Integration tests for HBASE-29974 Path 1: - * {@link Filter#getHintForRejectedRow(Cell)} allows the scan pipeline to seek directly past - * rejected rows instead of iterating through every cell in each rejected row one-by-one via - * {@code nextRow()}. - * - *

Each test verifies two properties simultaneously: + * Integration tests for HBASE-29974 Path 1: {@link Filter#getHintForRejectedRow(Cell)} allows the + * scan pipeline to seek directly past rejected rows instead of iterating through every cell in each + * rejected row one-by-one via {@code nextRow()}. + *

+ * Each test verifies two properties simultaneously: *

    - *
  1. Correctness — the scan returns exactly the rows that are not rejected by - * {@link Filter#filterRowKey(Cell)}, regardless of whether the hint path or the - * legacy cell-iteration path is used.
  2. - *
  3. Efficiency — when a filter provides a hint that jumps over all N rejected rows - * in one seek, {@code getHintForRejectedRow} is called exactly once (not N times), proving - * that the scanner did not fall back to cell-by-cell iteration for the skipped rows.
  4. + *
  5. Correctness — the scan returns exactly the rows that are not rejected by + * {@link Filter#filterRowKey(Cell)}, regardless of whether the hint path or the legacy + * cell-iteration path is used.
  6. + *
  7. Efficiency — when a filter provides a hint that jumps over all N rejected rows in one + * seek, {@code getHintForRejectedRow} is called exactly once (not N times), proving that the + * scanner did not fall back to cell-by-cell iteration for the skipped rows.
  8. *
*/ @Category({ FilterTests.class, MediumTests.class }) @@ -89,8 +88,7 @@ public void setUp() throws Exception { TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName())) .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); - RegionInfo info = - RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build(); + RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build(); this.region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(), TEST_UTIL.getConfiguration(), tableDescriptor); } @@ -105,12 +103,11 @@ public void tearDown() throws Exception { // ------------------------------------------------------------------------- /** - * Writes {@code count} rows into the test region. Row keys are formatted as - * {@code "row-XX"} (zero-padded to two digits) and each row contains - * {@link #CELLS_PER_ROW} cells with qualifier {@code "q-NN"}. - * - * @param prefix string prefix used for both row keys and qualifier names - * @param count number of rows to write + * Writes {@code count} rows into the test region. Row keys are formatted as {@code "row-XX"} + * (zero-padded to two digits) and each row contains {@link #CELLS_PER_ROW} cells with qualifier + * {@code "q-NN"}. + * @param prefix string prefix used for both row keys and qualifier names + * @param count number of rows to write * @throws IOException if any put fails */ private void writeRows(String prefix, int count) throws IOException { @@ -132,19 +129,19 @@ private void writeRows(String prefix, int count) throws IOException { /** * HBASE-29974 Path 1 — single-batch seek hint. - * - *

The filter rejects the first 5 rows ({@code "row-00"} through {@code "row-04"}) via - * {@link Filter#filterRowKey(Cell)} and, for every rejected row, returns a hint pointing - * directly to {@code "row-05"} via {@link Filter#getHintForRejectedRow(Cell)}. - * - *

Expected behaviour: + *

+ * The filter rejects the first 5 rows ({@code "row-00"} through {@code "row-04"}) via + * {@link Filter#filterRowKey(Cell)} and, for every rejected row, returns a hint pointing directly + * to {@code "row-05"} via {@link Filter#getHintForRejectedRow(Cell)}. + *

+ * Expected behaviour: *

    - *
  • The scanner seeks directly to {@code "row-05"} after the very first rejection, - * bypassing cells in rows {@code "row-01"} through {@code "row-04"} entirely.
  • - *
  • {@code getHintForRejectedRow} is called exactly once (not five times), confirming - * no cell-by-cell iteration occurred for the skipped rows.
  • - *
  • Rows {@code "row-05"} through {@code "row-09"} are returned with all their cells - * intact.
  • + *
  • The scanner seeks directly to {@code "row-05"} after the very first rejection, bypassing + * cells in rows {@code "row-01"} through {@code "row-04"} entirely.
  • + *
  • {@code getHintForRejectedRow} is called exactly once (not five times), confirming no + * cell-by-cell iteration occurred for the skipped rows.
  • + *
  • Rows {@code "row-05"} through {@code "row-09"} are returned with all their cells + * intact.
  • *
*/ @Test @@ -166,9 +163,9 @@ public boolean filterRowKey(Cell cell) { } /** - * Returns a hint pointing directly to the first accepted row, regardless of which - * rejected row triggered this call. Because the hint bypasses all remaining rejected - * rows in one seek, this method should be invoked exactly once for the whole scan. + * Returns a hint pointing directly to the first accepted row, regardless of which rejected + * row triggered this call. Because the hint bypasses all remaining rejected rows in one seek, + * this method should be invoked exactly once for the whole scan. */ @Override public Cell getHintForRejectedRow(Cell firstRowCell) { @@ -178,8 +175,8 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { }; List results = new ArrayList<>(); - try (RegionScanner scanner = region.getScanner(new Scan().addFamily(FAMILY) - .setFilter(hintFilter))) { + try (RegionScanner scanner = + region.getScanner(new Scan().addFamily(FAMILY).setFilter(hintFilter))) { List rowCells = new ArrayList<>(); boolean hasMore; do { @@ -190,8 +187,7 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { } // ----- Correctness assertions ----- - assertEquals( - "Scan must return exactly the cells from the " + acceptedCount + " accepted rows", + assertEquals("Scan must return exactly the cells from the " + acceptedCount + " accepted rows", acceptedCount * CELLS_PER_ROW, results.size()); for (Cell c : results) { assertTrue("Every returned cell must belong to the accepted row range", @@ -203,17 +199,17 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { // The hint jumps over all 5 rejected rows in one seek, so the filter should be asked for // a hint exactly once — not once per rejected row. assertEquals( - "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows", - 1, hintCallCount.get()); + "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows", 1, + hintCallCount.get()); } /** * HBASE-29974 Path 1 — backward compatibility: null hint falls through to {@code nextRow()}. - * - *

When {@link Filter#getHintForRejectedRow(Cell)} returns {@code null} (the default from + *

+ * When {@link Filter#getHintForRejectedRow(Cell)} returns {@code null} (the default from * {@link FilterBase}), the scanner must fall back to the existing cell-by-cell {@code nextRow()} - * behaviour and still return the correct results. This test acts as a regression guard to - * confirm that the null-hint path does not alter observable scan results. + * behaviour and still return the correct results. This test acts as a regression guard to confirm + * that the null-hint path does not alter observable scan results. */ @Test public void testNullHintFallsThroughToLegacyNextRowBehaviour() throws IOException { @@ -234,8 +230,8 @@ public boolean filterRowKey(Cell cell) { }; List results = new ArrayList<>(); - try (RegionScanner scanner = region.getScanner(new Scan().addFamily(FAMILY) - .setFilter(noHintFilter))) { + try (RegionScanner scanner = + region.getScanner(new Scan().addFamily(FAMILY).setFilter(noHintFilter))) { List rowCells = new ArrayList<>(); boolean hasMore; do { @@ -245,8 +241,7 @@ public boolean filterRowKey(Cell cell) { } while (hasMore); } - assertEquals( - "Null-hint path must still return the correct cells from the accepted rows", + assertEquals("Null-hint path must still return the correct cells from the accepted rows", acceptedCount * CELLS_PER_ROW, results.size()); for (Cell c : results) { assertTrue("Every returned cell must belong to the accepted row range", @@ -257,10 +252,10 @@ public boolean filterRowKey(Cell cell) { /** * HBASE-29974 Path 1 — hint pointing beyond the last row terminates the scan cleanly. - * - *

If the filter's {@link Filter#getHintForRejectedRow(Cell)} returns a position that is - * past the end of the table (beyond all written rows), the scan must complete without - * returning any results and without throwing an exception. + *

+ * If the filter's {@link Filter#getHintForRejectedRow(Cell)} returns a position that is past the + * end of the table (beyond all written rows), the scan must complete without returning any + * results and without throwing an exception. */ @Test public void testHintBeyondLastRowTerminatesScanGracefully() throws IOException { @@ -283,8 +278,8 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { }; List results = new ArrayList<>(); - try (RegionScanner scanner = region.getScanner(new Scan().addFamily(FAMILY) - .setFilter(beyondHintFilter))) { + try (RegionScanner scanner = + region.getScanner(new Scan().addFamily(FAMILY).setFilter(beyondHintFilter))) { List rowCells = new ArrayList<>(); boolean hasMore; do { @@ -294,17 +289,17 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { } while (hasMore); } - assertTrue( - "When the hint is past the last row, no cells should be returned", results.isEmpty()); + assertTrue("When the hint is past the last row, no cells should be returned", + results.isEmpty()); } /** * HBASE-29974 Path 1 — hint for every rejected row (per-row hint stepping). - * - *

When the filter provides a hint that advances only one row at a time (i.e. it always - * points to the immediate next row key), {@code getHintForRejectedRow} is called once per - * rejected row. This verifies that the hint mechanism works correctly in the incremental-step - * case, not just the bulk-jump case. + *

+ * When the filter provides a hint that advances only one row at a time (i.e. it always points to + * the immediate next row key), {@code getHintForRejectedRow} is called once per rejected row. + * This verifies that the hint mechanism works correctly in the incremental-step case, not just + * the bulk-jump case. */ @Test public void testPerRowHintCalledOncePerRejectedRow() throws IOException { @@ -324,9 +319,9 @@ public boolean filterRowKey(Cell cell) { } /** - * Returns a hint pointing to the cell immediately after the current one (one row at a - * time). This causes the scanner to seek per-row, so this method is called once for - * each rejected row. + * Returns a hint pointing to the cell immediately after the current one (one row at a time). + * This causes the scanner to seek per-row, so this method is called once for each rejected + * row. */ @Override public Cell getHintForRejectedRow(Cell firstRowCell) { @@ -337,8 +332,8 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { }; List results = new ArrayList<>(); - try (RegionScanner scanner = region.getScanner(new Scan().addFamily(FAMILY) - .setFilter(perRowHintFilter))) { + try (RegionScanner scanner = + region.getScanner(new Scan().addFamily(FAMILY).setFilter(perRowHintFilter))) { List rowCells = new ArrayList<>(); boolean hasMore; do { @@ -349,8 +344,7 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { } // ----- Correctness ----- - assertEquals( - "Scan must return exactly the cells from the " + acceptedCount + " accepted rows", + assertEquals("Scan must return exactly the cells from the " + acceptedCount + " accepted rows", acceptedCount * CELLS_PER_ROW, results.size()); for (Cell c : results) { assertTrue("Every returned cell must belong to the accepted row range", diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java index 0887ebc83479..719c57e32927 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java @@ -300,8 +300,8 @@ public ReturnCode filterCell(final Cell c) { * A filter whose {@link #getSkipHint(Cell)} always returns a fixed {@link ExtendedCell} target, * simulating a range-driven filter (e.g. {@code MultiRowRangeFilter}) that knows the next * interesting position without inspecting the skipped cell. The filter's - * {@link #filterCell(Cell)} is intentionally left as the default include-all so that it does - * not interfere with tests that need cells to reach that method. + * {@link #filterCell(Cell)} is intentionally left as the default include-all so that it does not + * interfere with tests that need cells to reach that method. */ private static class FixedSkipHintFilter extends FilterBase { final ExtendedCell hint; @@ -426,11 +426,11 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE, /** * HBASE-29974 Path 2 — time-range upper-bound gate (tsCmp > 0). - * - *

When a cell's timestamp is newer than the scan's time-range maximum ({@code ts >= maxTs}), - * the original code returns {@link MatchCode#SKIP} without ever calling {@code filterCell}. - * With the fix, the matcher must first ask the filter for a skip hint. If the filter returns a - * non-null hint, {@link MatchCode#SEEK_NEXT_USING_HINT} must be returned and + *

+ * When a cell's timestamp is newer than the scan's time-range maximum ({@code ts >= maxTs}), the + * original code returns {@link MatchCode#SKIP} without ever calling {@code filterCell}. With the + * fix, the matcher must first ask the filter for a skip hint. If the filter returns a non-null + * hint, {@link MatchCode#SEEK_NEXT_USING_HINT} must be returned and * {@link UserScanQueryMatcher#getNextKeyHint} must return that hint. */ @Test @@ -441,13 +441,13 @@ public void testSkipHintConsultedForCellNewerThanTimeRange() throws IOException // The hint points to a cell in the middle of the valid time range so the scanner can resume. KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 1500, data); - Scan timeRangeScan = - new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new FixedSkipHintFilter(hintCell)); + Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs) + .setFilter(new FixedSkipHintFilter(hintCell)); long now2 = EnvironmentEdgeManager.currentTime(); UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, - new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, - HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), null, 0, now2, null); // ts=now2 >= maxTs, so tsCmp > 0: time-range gate fires before filterCell (Path 2). @@ -465,11 +465,11 @@ public void testSkipHintConsultedForCellNewerThanTimeRange() throws IOException /** * HBASE-29974 Path 2 — time-range lower-bound gate (tsCmp < 0). - * - *

When a cell's timestamp is older than the scan's time-range minimum ({@code ts < minTs}), - * the original code returns the column-tracker's next-row-or-next-column suggestion without - * consulting the filter. With the fix, the matcher must first ask the filter for a skip hint, - * and prefer it over the column-tracker's answer when non-null. + *

+ * When a cell's timestamp is older than the scan's time-range minimum ({@code ts < minTs}), the + * original code returns the column-tracker's next-row-or-next-column suggestion without + * consulting the filter. With the fix, the matcher must first ask the filter for a skip hint, and + * prefer it over the column-tracker's answer when non-null. */ @Test public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException { @@ -478,12 +478,12 @@ public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException long maxTs = now; KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 100, data); - Scan timeRangeScan = - new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new FixedSkipHintFilter(hintCell)); + Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs) + .setFilter(new FixedSkipHintFilter(hintCell)); UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, - new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, - HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), null, 0, now, null); // ts = now-1000 < minTs (now-500), so tsCmp < 0: time-range gate fires (Path 2). @@ -499,8 +499,8 @@ public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException /** * HBASE-29974 Path 2 — time-range gate, null hint falls through. - * - *

When the filter's {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} returns + *

+ * When the filter's {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} returns * {@code null}, the matcher must fall back to the original structural skip code (SKIP or the * column-tracker result) with no regression. */ @@ -516,24 +516,24 @@ public void testNullSkipHintFallsThroughToOriginalCodeOnTimeRangeGate() throws I long now2 = EnvironmentEdgeManager.currentTime(); UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan, - new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, - HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), null, 0, now2, null); KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); qm.setToNewRow(tooNew); // With a null hint the gate must still return the original SKIP code. - assertEquals("Null getSkipHint must not change the original SKIP match code", - MatchCode.SKIP, qm.match(tooNew)); + assertEquals("Null getSkipHint must not change the original SKIP match code", MatchCode.SKIP, + qm.match(tooNew)); } /** * HBASE-29974 Path 3 — column-exclusion gate ({@code checkColumn() != INCLUDE}). - * - *

When an explicit-column scan encounters a qualifier not in its requested set, the column - * tracker returns a skip/seek code before {@code filterCell} is ever reached. With the fix, - * the matcher must consult {@code getSkipHint} first, and return + *

+ * When an explicit-column scan encounters a qualifier not in its requested set, the column + * tracker returns a skip/seek code before {@code filterCell} is ever reached. With the fix, the + * matcher must consult {@code getSkipHint} first, and return * {@link MatchCode#SEEK_NEXT_USING_HINT} when a non-null hint is available. */ @Test @@ -544,8 +544,8 @@ public void testSkipHintConsultedForExcludedColumn() throws IOException { KeyValue hintCell = new KeyValue(row1, fam2, col2, 1, data); Scan scanWithFilter = new Scan(scan).setFilter(new FixedSkipHintFilter(hintCell)); - UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, - new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + UserScanQueryMatcher qm = UserScanQueryMatcher.create( + scanWithFilter, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), get.getFamilyMap().get(fam2), now - ttl, now, null); @@ -564,10 +564,10 @@ public void testSkipHintConsultedForExcludedColumn() throws IOException { /** * HBASE-29974 Path 3 — version-exhaustion gate ({@code checkVersions()} returns * {@link MatchCode#SEEK_NEXT_COL}). - * - *

When the column tracker has already seen the maximum number of versions for a qualifier, - * it returns SKIP or SEEK_NEXT_COL for subsequent versions, again bypassing {@code filterCell}. - * With the fix, the matcher must consult {@code getSkipHint} at that point. + *

+ * When the column tracker has already seen the maximum number of versions for a qualifier, it + * returns SKIP or SEEK_NEXT_COL for subsequent versions, again bypassing {@code filterCell}. With + * the fix, the matcher must consult {@code getSkipHint} at that point. */ @Test public void testSkipHintConsultedOnVersionExhaustion() throws IOException { @@ -578,8 +578,8 @@ public void testSkipHintConsultedOnVersionExhaustion() throws IOException { Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell)); UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, - new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, - HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), null, now - ttl, now, null); // First version of col1: passes all gates and reaches filterCell. @@ -595,18 +595,17 @@ public void testSkipHintConsultedOnVersionExhaustion() throws IOException { MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); assertEquals("getNextKeyHint must return the pending skip hint from version gate", hintCell, qm.getNextKeyHint(version2)); - assertNull("pendingSkipHint must be cleared after being drained", - qm.getNextKeyHint(version2)); + assertNull("pendingSkipHint must be cleared after being drained", qm.getNextKeyHint(version2)); } /** * HBASE-29974 — {@code pendingSkipHint} must not interfere with the normal * {@code filterCell → SEEK_NEXT_USING_HINT → getNextCellHint} path. - * - *

If a cell passes all structural gates and reaches {@code filterCell}, which then returns + *

+ * If a cell passes all structural gates and reaches {@code filterCell}, which then returns * {@link ReturnCode#SEEK_NEXT_USING_HINT}, the existing {@link Filter#getNextCellHint(Cell)} - * mechanism must still be used (not {@code getSkipHint}), and {@code pendingSkipHint} must - * remain null so it does not contaminate the result. + * mechanism must still be used (not {@code getSkipHint}), and {@code pendingSkipHint} must remain + * null so it does not contaminate the result. */ @Test public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOException { @@ -617,11 +616,13 @@ public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOEx // A filter that returns SEEK_NEXT_USING_HINT from filterCell and provides a hint through // getNextCellHint, while NOT overriding getSkipHint (returns null by default). FilterBase seekFilter = new FilterBase() { - @Override public ReturnCode filterCell(Cell c) { + @Override + public ReturnCode filterCell(Cell c) { return ReturnCode.SEEK_NEXT_USING_HINT; } - @Override public Cell getNextCellHint(Cell currentCell) { + @Override + public Cell getNextCellHint(Cell currentCell) { return filterHintCell; } }; @@ -630,7 +631,8 @@ public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOEx UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, - 0, rowComparator, false), null, now - ttl, now, null); + 0, rowComparator, false), + null, now - ttl, now, null); KeyValue cell = new KeyValue(row1, fam2, col1, now - 10, data); qm.setToNewRow(cell); From 8a23326304e1a71ade6c5b949d616b481dc4c11d Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Mon, 4 May 2026 14:21:47 +0530 Subject: [PATCH 5/9] Addressed RCs. --- .../apache/hadoop/hbase/filter/Filter.java | 4 + .../hbase/regionserver/RegionScannerImpl.java | 16 +- .../querymatcher/UserScanQueryMatcher.java | 48 +- .../filter/TestFilterHintForRejectedRow.java | 513 ++++++++++++------ .../TestUserScanQueryMatcher.java | 130 +---- 5 files changed, 403 insertions(+), 308 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index b3faa15e88c3..f29f99fb3cf2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -30,6 +30,10 @@ *

  • {@link #reset()} : reset the filter state before filtering a new row.
  • *
  • {@link #filterAllRemaining()}: true means row scan is over; false means keep going.
  • *
  • {@link #filterRowKey(Cell)}: true means drop this row; false means include.
  • + *
  • {@link #getHintForRejectedRow(Cell)}: if {@code filterRowKey} returned true, optionally + * provide a seek hint to skip past the rejected row efficiently.
  • + *
  • {@link #getSkipHint(Cell)}: when a cell is structurally skipped (time-range, column, or + * version gate) before {@code filterCell} is reached, optionally provide a seek hint.
  • *
  • {@link #filterCell(Cell)}: decides whether to include or exclude this Cell. See * {@link ReturnCode}.
  • *
  • {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the Cell. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 625064146eb5..94ddb5b75098 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -761,8 +761,9 @@ protected boolean nextRow(ScannerContext scannerContext, Cell curRowCell) throws /** * Fast-path alternative to {@link #nextRow} used when the filter has provided a seek hint via * {@link org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}. Instead of iterating - * through every cell in the rejected row one-by-one, this method issues a single - * {@code requestSeek} to jump directly to the filter's suggested position. + * through every cell in the rejected row one-by-one, this method issues a single seek to jump + * directly to the filter's suggested position ({@code requestSeek} for forward scans, + * {@code backwardSeek} for reversed scans). *

    * The skipping-row mode flag is set around the seek so that block-level size tracking continues * to function (consistent with {@link #nextRow}), and the filter state is reset afterwards so the @@ -782,14 +783,19 @@ private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, E int difference = comparator.compare(hint, curRowCell); if ((!reversed && difference > 0) || (reversed && difference < 0)) { - // Enable skipping-row mode so block-size accounting is consistent with nextRow(). scannerContext.setSkippingRow(true); - this.storeHeap.requestSeek(hint, true, true); + if (reversed) { + // ReversedKeyValueHeap does not support requestSeek; use backwardSeek + // to position at-or-before the hint within the target row. + // seekToPreviousRow would skip past the hint row entirely. + this.storeHeap.backwardSeek(hint); + } else { + this.storeHeap.requestSeek(hint, true, true); + } scannerContext.setSkippingRow(false); resetFilters(); - // Notify coprocessors, identical to the epilogue in nextRow(). return this.region.getCoprocessorHost() == null || this.region.getCoprocessorHost().postScannerFilterRow(this, curRowCell); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index bcdea72368bf..1c7a8f475cbf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -117,26 +117,22 @@ public Filter getFilter() { @Override public ExtendedCell getNextKeyHint(ExtendedCell cell) throws IOException { - // A structural short-circuit in matchColumn (time-range, column, or version gate) may have - // stored a hint via resolveSkipHint() before returning SEEK_NEXT_USING_HINT. Drain and return - // it first; it takes priority because it was produced for the exact cell that triggered the - // seek code, without ever calling filterCell. if (pendingSkipHint != null) { ExtendedCell hint = pendingSkipHint; pendingSkipHint = null; return hint; } - // Normal path: filterCell returned SEEK_NEXT_USING_HINT — delegate to the filter. if (filter == null) { return null; - } - Cell hint = filter.getNextCellHint(cell); - if (hint == null || hint instanceof ExtendedCell) { - return (ExtendedCell) hint; } else { - throw new DoNotRetryIOException("Incorrect filter implementation, " - + "the Cell returned by getNextKeyHint is not an ExtendedCell. Filter class: " - + filter.getClass().getName()); + Cell hint = filter.getNextCellHint(cell); + if (hint == null || hint instanceof ExtendedCell) { + return (ExtendedCell) hint; + } else { + throw new DoNotRetryIOException("Incorrect filter implementation, " + + "the Cell returned by getNextKeyHint is not an ExtendedCell. Filter class: " + + filter.getClass().getName()); + } } } @@ -146,24 +142,24 @@ public void beforeShipped() throws IOException { if (curColCell != null) { this.curColCell = KeyValueUtil.toNewKeyCell(this.curColCell); } + if (pendingSkipHint != null) { + this.pendingSkipHint = KeyValueUtil.toNewKeyCell(this.pendingSkipHint); + } } + // At each structural short-circuit below (time-range, column-exclusion, version-exhaustion), + // the filter is consulted via resolveSkipHint() before falling back to the default skip/seek + // code. This lets filters provide a forward seek target even when filterCell is never called. protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte typeByte) throws IOException { int tsCmp = tr.compare(timestamp); if (tsCmp > 0) { - // Cell is newer than the scan's time-range upper bound. Give the filter one last chance to - // provide a seek hint before we fall back to a plain cell-level SKIP. This addresses - // HBASE-29974 Path 2: time-range gate fires before filterCell is reached. if (resolveSkipHint(cell)) { return MatchCode.SEEK_NEXT_USING_HINT; } return MatchCode.SKIP; } if (tsCmp < 0) { - // Cell is older than the scan's time-range lower bound. Give the filter a chance to provide - // a seek hint before we defer to the column tracker's next-row/next-column suggestion. - // Addresses HBASE-29974 Path 2: time-range gate fires before filterCell is reached. if (resolveSkipHint(cell)) { return MatchCode.SEEK_NEXT_USING_HINT; } @@ -172,8 +168,6 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty // STEP 1: Check if the column is part of the requested columns MatchCode matchCode = columns.checkColumn(cell, typeByte); if (matchCode != MatchCode.INCLUDE) { - // Column is excluded by the scan's column set. Give the filter a chance to provide a - // seek hint before the column-tracker's suggestion is used. Addresses HBASE-29974 Path 3. if (resolveSkipHint(cell)) { return MatchCode.SEEK_NEXT_USING_HINT; } @@ -186,15 +180,11 @@ protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte ty matchCode = columns.checkVersions(cell, timestamp, typeByte, false); switch (matchCode) { case SKIP: - // Version limit reached; skip this cell. Give the filter a hint opportunity before - // falling back to SKIP. Addresses HBASE-29974 Path 3: version gate fires before filterCell. if (resolveSkipHint(cell)) { return MatchCode.SEEK_NEXT_USING_HINT; } return MatchCode.SKIP; case SEEK_NEXT_COL: - // Version limit reached; advance to the next column. Give the filter a hint opportunity - // before falling back to SEEK_NEXT_COL. Addresses HBASE-29974 Path 3. if (resolveSkipHint(cell)) { return MatchCode.SEEK_NEXT_USING_HINT; } @@ -241,7 +231,15 @@ private boolean resolveSkipHint(ExtendedCell cell) throws IOException { "Incorrect filter implementation: the Cell returned by getSkipHint " + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); } - pendingSkipHint = (ExtendedCell) raw; + ExtendedCell hint = (ExtendedCell) raw; + // Only accept the hint if it advances past the current cell. A backward or + // no-op hint would cause the scanner to re-visit the same cell, degrading + // to a tight re-match loop. This mirrors the guard in StoreScanner's + // SEEK_NEXT_USING_HINT handler (see StoreScanner line ~790). + if (rowComparator.compare(hint, cell) <= 0) { + return false; + } + pendingSkipHint = hint; return true; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java index a381f950ed20..321e6ef6ac43 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.PrivateCellUtil; @@ -50,21 +51,6 @@ import org.junit.experimental.categories.Category; import org.junit.rules.TestName; -/** - * Integration tests for HBASE-29974 Path 1: {@link Filter#getHintForRejectedRow(Cell)} allows the - * scan pipeline to seek directly past rejected rows instead of iterating through every cell in each - * rejected row one-by-one via {@code nextRow()}. - *

    - * Each test verifies two properties simultaneously: - *

      - *
    1. Correctness — the scan returns exactly the rows that are not rejected by - * {@link Filter#filterRowKey(Cell)}, regardless of whether the hint path or the legacy - * cell-iteration path is used.
    2. - *
    3. Efficiency — when a filter provides a hint that jumps over all N rejected rows in one - * seek, {@code getHintForRejectedRow} is called exactly once (not N times), proving that the - * scanner did not fall back to cell-by-cell iteration for the skipped rows.
    4. - *
    - */ @Category({ FilterTests.class, MediumTests.class }) public class TestFilterHintForRejectedRow { @@ -75,6 +61,7 @@ public class TestFilterHintForRejectedRow { private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); private static final byte[] FAMILY = Bytes.toBytes("f"); + private static final byte[] FAMILY2 = Bytes.toBytes("g"); private static final int CELLS_PER_ROW = 10; private static final byte[] VALUE = Bytes.toBytes("value"); @@ -87,7 +74,8 @@ public class TestFilterHintForRejectedRow { public void setUp() throws Exception { TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName())) - .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY2)).build(); RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build(); this.region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(), TEST_UTIL.getConfiguration(), tableDescriptor); @@ -98,18 +86,6 @@ public void tearDown() throws Exception { HBaseTestingUtil.closeRegionAndWAL(this.region); } - // ------------------------------------------------------------------------- - // Helper - // ------------------------------------------------------------------------- - - /** - * Writes {@code count} rows into the test region. Row keys are formatted as {@code "row-XX"} - * (zero-padded to two digits) and each row contains {@link #CELLS_PER_ROW} cells with qualifier - * {@code "q-NN"}. - * @param prefix string prefix used for both row keys and qualifier names - * @param count number of rows to write - * @throws IOException if any put fails - */ private void writeRows(String prefix, int count) throws IOException { for (int i = 0; i < count; i++) { byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i)); @@ -123,30 +99,38 @@ private void writeRows(String prefix, int count) throws IOException { this.region.flush(true); } - // ------------------------------------------------------------------------- - // Tests - // ------------------------------------------------------------------------- - - /** - * HBASE-29974 Path 1 — single-batch seek hint. - *

    - * The filter rejects the first 5 rows ({@code "row-00"} through {@code "row-04"}) via - * {@link Filter#filterRowKey(Cell)} and, for every rejected row, returns a hint pointing directly - * to {@code "row-05"} via {@link Filter#getHintForRejectedRow(Cell)}. - *

    - * Expected behaviour: - *

      - *
    • The scanner seeks directly to {@code "row-05"} after the very first rejection, bypassing - * cells in rows {@code "row-01"} through {@code "row-04"} entirely.
    • - *
    • {@code getHintForRejectedRow} is called exactly once (not five times), confirming no - * cell-by-cell iteration occurred for the skipped rows.
    • - *
    • Rows {@code "row-05"} through {@code "row-09"} are returned with all their cells - * intact.
    • - *
    - */ + private void writeRowsMultiFamily(String prefix, int count) throws IOException { + for (int i = 0; i < count; i++) { + byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + for (int j = 0; j < CELLS_PER_ROW; j++) { + byte[] qual = Bytes.toBytes(String.format("q-%02d", j)); + p.addColumn(FAMILY, qual, VALUE); + p.addColumn(FAMILY2, qual, VALUE); + } + this.region.put(p); + } + this.region.flush(true); + } + + private List scanAll(Scan scan) throws IOException { + List results = new ArrayList<>(); + try (RegionScanner scanner = region.getScanner(scan)) { + List rowCells = new ArrayList<>(); + boolean hasMore; + do { + hasMore = scanner.next(rowCells); + results.addAll(rowCells); + rowCells.clear(); + } while (hasMore); + } + return results; + } + @Test - public void testHintJumpsOverAllRejectedRowsInOneSingleSeek() throws IOException { - final String prefix = "hint-single"; + public void testHintAndNoHintReturnSameCellsOnSameData() throws IOException { + final String prefix = "row"; final int rejectedCount = 5; final int acceptedCount = 5; writeRows(prefix, rejectedCount + acceptedCount); @@ -157,16 +141,10 @@ public void testHintJumpsOverAllRejectedRowsInOneSingleSeek() throws IOException FilterBase hintFilter = new FilterBase() { @Override public boolean filterRowKey(Cell cell) { - // Reject any row whose key is lexicographically less than acceptedStartRow. return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), acceptedStartRow, 0, acceptedStartRow.length) < 0; } - /** - * Returns a hint pointing directly to the first accepted row, regardless of which rejected - * row triggered this call. Because the hint bypasses all remaining rejected rows in one seek, - * this method should be invoked exactly once for the whole scan. - */ @Override public Cell getHintForRejectedRow(Cell firstRowCell) { hintCallCount.incrementAndGet(); @@ -174,53 +152,6 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { } }; - List results = new ArrayList<>(); - try (RegionScanner scanner = - region.getScanner(new Scan().addFamily(FAMILY).setFilter(hintFilter))) { - List rowCells = new ArrayList<>(); - boolean hasMore; - do { - hasMore = scanner.next(rowCells); - results.addAll(rowCells); - rowCells.clear(); - } while (hasMore); - } - - // ----- Correctness assertions ----- - assertEquals("Scan must return exactly the cells from the " + acceptedCount + " accepted rows", - acceptedCount * CELLS_PER_ROW, results.size()); - for (Cell c : results) { - assertTrue("Every returned cell must belong to the accepted row range", - Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, - acceptedStartRow.length) >= 0); - } - - // ----- Efficiency assertion ----- - // The hint jumps over all 5 rejected rows in one seek, so the filter should be asked for - // a hint exactly once — not once per rejected row. - assertEquals( - "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows", 1, - hintCallCount.get()); - } - - /** - * HBASE-29974 Path 1 — backward compatibility: null hint falls through to {@code nextRow()}. - *

    - * When {@link Filter#getHintForRejectedRow(Cell)} returns {@code null} (the default from - * {@link FilterBase}), the scanner must fall back to the existing cell-by-cell {@code nextRow()} - * behaviour and still return the correct results. This test acts as a regression guard to confirm - * that the null-hint path does not alter observable scan results. - */ - @Test - public void testNullHintFallsThroughToLegacyNextRowBehaviour() throws IOException { - final String prefix = "hint-null"; - final int rejectedCount = 3; - final int acceptedCount = 3; - writeRows(prefix, rejectedCount + acceptedCount); - - final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); - - // This filter rejects rows but does NOT override getHintForRejectedRow (returns null). FilterBase noHintFilter = new FilterBase() { @Override public boolean filterRowKey(Cell cell) { @@ -229,46 +160,32 @@ public boolean filterRowKey(Cell cell) { } }; - List results = new ArrayList<>(); - try (RegionScanner scanner = - region.getScanner(new Scan().addFamily(FAMILY).setFilter(noHintFilter))) { - List rowCells = new ArrayList<>(); - boolean hasMore; - do { - hasMore = scanner.next(rowCells); - results.addAll(rowCells); - rowCells.clear(); - } while (hasMore); - } + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); - assertEquals("Null-hint path must still return the correct cells from the accepted rows", - acceptedCount * CELLS_PER_ROW, results.size()); - for (Cell c : results) { - assertTrue("Every returned cell must belong to the accepted row range", - Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, - acceptedStartRow.length) >= 0); + assertEquals("Both paths must return the same number of cells", noHintResults.size(), + hintResults.size()); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(hintResults.get(i), noHintResults.get(i))); } + assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size()); + assertEquals( + "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows", 1, + hintCallCount.get()); } - /** - * HBASE-29974 Path 1 — hint pointing beyond the last row terminates the scan cleanly. - *

    - * If the filter's {@link Filter#getHintForRejectedRow(Cell)} returns a position that is past the - * end of the table (beyond all written rows), the scan must complete without returning any - * results and without throwing an exception. - */ @Test public void testHintBeyondLastRowTerminatesScanGracefully() throws IOException { final String prefix = "hint-beyond"; writeRows(prefix, 5); - // The hint points past "zzz", which is lexicographically after all "hint-beyond-XX" rows. final byte[] beyondAllRows = Bytes.toBytes("zzz-beyond-table-end"); FilterBase beyondHintFilter = new FilterBase() { @Override public boolean filterRowKey(Cell cell) { - return true; // reject every row + return true; } @Override @@ -277,30 +194,11 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { } }; - List results = new ArrayList<>(); - try (RegionScanner scanner = - region.getScanner(new Scan().addFamily(FAMILY).setFilter(beyondHintFilter))) { - List rowCells = new ArrayList<>(); - boolean hasMore; - do { - hasMore = scanner.next(rowCells); - results.addAll(rowCells); - rowCells.clear(); - } while (hasMore); - } - + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(beyondHintFilter)); assertTrue("When the hint is past the last row, no cells should be returned", results.isEmpty()); } - /** - * HBASE-29974 Path 1 — hint for every rejected row (per-row hint stepping). - *

    - * When the filter provides a hint that advances only one row at a time (i.e. it always points to - * the immediate next row key), {@code getHintForRejectedRow} is called once per rejected row. - * This verifies that the hint mechanism works correctly in the incremental-step case, not just - * the bulk-jump case. - */ @Test public void testPerRowHintCalledOncePerRejectedRow() throws IOException { final String prefix = "hint-perrow"; @@ -318,44 +216,313 @@ public boolean filterRowKey(Cell cell) { acceptedStartRow, 0, acceptedStartRow.length) < 0; } - /** - * Returns a hint pointing to the cell immediately after the current one (one row at a time). - * This causes the scanner to seek per-row, so this method is called once for each rejected - * row. - */ @Override public Cell getHintForRejectedRow(Cell firstRowCell) { hintCallCount.incrementAndGet(); - // Create a key that is one step past the current row key. return PrivateCellUtil.createFirstOnNextRow(firstRowCell); } }; - List results = new ArrayList<>(); - try (RegionScanner scanner = - region.getScanner(new Scan().addFamily(FAMILY).setFilter(perRowHintFilter))) { - List rowCells = new ArrayList<>(); - boolean hasMore; - do { - hasMore = scanner.next(rowCells); - results.addAll(rowCells); - rowCells.clear(); - } while (hasMore); - } + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(perRowHintFilter)); - // ----- Correctness ----- - assertEquals("Scan must return exactly the cells from the " + acceptedCount + " accepted rows", - acceptedCount * CELLS_PER_ROW, results.size()); + assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); for (Cell c : results) { assertTrue("Every returned cell must belong to the accepted row range", Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, acceptedStartRow.length) >= 0); } - - // ----- Hint call count ----- - // One call per rejected row: each per-row hint advances the scanner by exactly one row. assertEquals( "getHintForRejectedRow must be called once per rejected row in the per-row hint strategy", rejectedCount, hintCallCount.get()); } + + @Test + public void testReversedScanWithHint() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Accept rows 00-04 (lower half), reject rows 05-09 (upper half). + // In a reversed scan, the scanner starts at row-09 and moves backward. + final byte[] rejectThreshold = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 4)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + // In reversed scan, hint must point backward (to a smaller row key). + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + }; + + Scan reversedHintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(hintFilter); + Scan reversedNoHintScan = + new Scan().addFamily(FAMILY).setReversed(true).setFilter(noHintFilter); + + List hintResults = scanAll(reversedHintScan); + List noHintResults = scanAll(reversedNoHintScan); + + assertEquals("Both paths must return the same number of cells", noHintResults.size(), + hintResults.size()); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + } + assertEquals(5 * CELLS_PER_ROW, hintResults.size()); + assertEquals("getHintForRejectedRow must be called exactly once for reversed scan", 1, + hintCallCount.get()); + } + + @Test + public void testGetSkipHintWithTimeRangeIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("skiprow-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(FAMILY, Bytes.toBytes("q"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("q"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + final AtomicInteger skipHintCalls = new AtomicInteger(0); + + FilterBase skipHintFilter = new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + skipHintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnNextRow(skippedCell); + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(skipHintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals("Both paths must return the same number of cells", noHintResults.size(), + hintResults.size()); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + } + assertEquals(rowCount, hintResults.size()); + } + + @Test + public void testBackwardHintFallsBackToNextRow() throws IOException { + final String prefix = "row"; + writeRows(prefix, 5); + + final byte[] row00 = Bytes.toBytes(String.format("%s-%02d", prefix, 0)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 2)); + + FilterBase backwardHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(row00); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(backwardHintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals("Backward hint must produce same results as no-hint path", noHintResults.size(), + hintResults.size()); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + } + } + + @Test + public void testSameRowHintFallsBackToNextRow() throws IOException { + final String prefix = "row"; + writeRows(prefix, 5); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 2)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase sameRowHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(CellUtil.cloneRow(firstRowCell)); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(sameRowHintFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals("Same-row hint must produce same results as no-hint path", noHintResults.size(), + hintResults.size()); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + } + assertEquals(3 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testCoprocessorHookCalledOncePerHintedRejection() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + + assertEquals(1, hintCallCount.get()); + assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); + } + + @Test + public void testMultiFamilyScanWithHint() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRowsMultiFamily(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallCount = new AtomicInteger(0); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallCount.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + // Scan both families. + Scan hintScan = new Scan().addFamily(FAMILY).addFamily(FAMILY2).setFilter(hintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).addFamily(FAMILY2).setFilter(noHintFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals("Both paths must return the same number of cells", noHintResults.size(), + hintResults.size()); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + } + assertEquals(2 * CELLS_PER_ROW * acceptedCount, hintResults.size()); + assertEquals(1, hintCallCount.get()); + } + + @Test + public void testDataCorrectnessVsUnfilteredScan() throws IOException { + final String prefix = "row"; + final int totalRows = 8; + writeRows(prefix, totalRows); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + List filteredResults = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); + + // Unfiltered scan starting at acceptedStartRow — should return the exact same cells. + Scan unfilteredScan = new Scan().addFamily(FAMILY).withStartRow(acceptedStartRow); + List unfilteredResults = scanAll(unfilteredScan); + + assertEquals("Filtered and unfiltered scans must return same cell count", + unfilteredResults.size(), filteredResults.size()); + for (int i = 0; i < filteredResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(filteredResults.get(i), unfilteredResults.get(i))); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java index 719c57e32927..9a9bc320bb91 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java @@ -296,13 +296,6 @@ public ReturnCode filterCell(final Cell c) { } } - /** - * A filter whose {@link #getSkipHint(Cell)} always returns a fixed {@link ExtendedCell} target, - * simulating a range-driven filter (e.g. {@code MultiRowRangeFilter}) that knows the next - * interesting position without inspecting the skipped cell. The filter's - * {@link #filterCell(Cell)} is intentionally left as the default include-all so that it does not - * interfere with tests that need cells to reach that method. - */ private static class FixedSkipHintFilter extends FilterBase { final ExtendedCell hint; @@ -419,27 +412,13 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE, assertArrayEquals(nextCell.getQualifierArray(), col4); } - // ----------------------------------------------------------------------- - // Tests for HBASE-29974: Filter#getSkipHint consulted at matchColumn - // structural short-circuits (time-range, column, and version gates). - // ----------------------------------------------------------------------- - - /** - * HBASE-29974 Path 2 — time-range upper-bound gate (tsCmp > 0). - *

    - * When a cell's timestamp is newer than the scan's time-range maximum ({@code ts >= maxTs}), the - * original code returns {@link MatchCode#SKIP} without ever calling {@code filterCell}. With the - * fix, the matcher must first ask the filter for a skip hint. If the filter returns a non-null - * hint, {@link MatchCode#SEEK_NEXT_USING_HINT} must be returned and - * {@link UserScanQueryMatcher#getNextKeyHint} must return that hint. - */ + /** Verify that getSkipHint is consulted when a cell is newer than the time-range upper bound. */ @Test public void testSkipHintConsultedForCellNewerThanTimeRange() throws IOException { long now = EnvironmentEdgeManager.currentTime(); long minTs = now - 2000; long maxTs = now - 1000; - // The hint points to a cell in the middle of the valid time range so the scanner can resume. KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 1500, data); Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs) .setFilter(new FixedSkipHintFilter(hintCell)); @@ -450,27 +429,16 @@ public void testSkipHintConsultedForCellNewerThanTimeRange() throws IOException 0, rowComparator, false), null, 0, now2, null); - // ts=now2 >= maxTs, so tsCmp > 0: time-range gate fires before filterCell (Path 2). + // ts=now2 >= maxTs, so tsCmp > 0: time-range gate fires before filterCell. KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); qm.setToNewRow(tooNew); - assertEquals("Filter hint must promote SKIP to SEEK_NEXT_USING_HINT", - MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew)); - assertEquals("getNextKeyHint must return the pending skip hint", hintCell, - qm.getNextKeyHint(tooNew)); - // pendingSkipHint is consumed on the first getNextKeyHint call; subsequent calls fall through - // to FilterBase#getNextCellHint which returns null. - assertNull("pendingSkipHint must be cleared after being drained", qm.getNextKeyHint(tooNew)); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew)); + assertEquals(hintCell, qm.getNextKeyHint(tooNew)); + assertNull(qm.getNextKeyHint(tooNew)); } - /** - * HBASE-29974 Path 2 — time-range lower-bound gate (tsCmp < 0). - *

    - * When a cell's timestamp is older than the scan's time-range minimum ({@code ts < minTs}), the - * original code returns the column-tracker's next-row-or-next-column suggestion without - * consulting the filter. With the fix, the matcher must first ask the filter for a skip hint, and - * prefer it over the column-tracker's answer when non-null. - */ + /** Verify that getSkipHint is consulted when a cell is older than the time-range lower bound. */ @Test public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException { long now = EnvironmentEdgeManager.currentTime(); @@ -486,31 +454,22 @@ public void testSkipHintConsultedForCellOlderThanTimeRange() throws IOException 0, rowComparator, false), null, 0, now, null); - // ts = now-1000 < minTs (now-500), so tsCmp < 0: time-range gate fires (Path 2). + // ts = now-1000 < minTs (now-500), so tsCmp < 0: time-range gate fires. KeyValue tooOld = new KeyValue(row1, fam2, col1, now - 1000, data); qm.setToNewRow(tooOld); - assertEquals("Filter hint must promote column-tracker result to SEEK_NEXT_USING_HINT", - MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooOld)); - assertEquals("getNextKeyHint must return the pending skip hint", hintCell, - qm.getNextKeyHint(tooOld)); - assertNull("pendingSkipHint must be cleared after being drained", qm.getNextKeyHint(tooOld)); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooOld)); + assertEquals(hintCell, qm.getNextKeyHint(tooOld)); + assertNull(qm.getNextKeyHint(tooOld)); } - /** - * HBASE-29974 Path 2 — time-range gate, null hint falls through. - *

    - * When the filter's {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)} returns - * {@code null}, the matcher must fall back to the original structural skip code (SKIP or the - * column-tracker result) with no regression. - */ + /** Verify that a null getSkipHint falls back to the original SKIP match code. */ @Test public void testNullSkipHintFallsThroughToOriginalCodeOnTimeRangeGate() throws IOException { long now = EnvironmentEdgeManager.currentTime(); long minTs = now - 2000; long maxTs = now - 1000; - // AlwaysIncludeFilter does not override getSkipHint(), so it returns null. Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new AlwaysIncludeFilter()); @@ -523,19 +482,10 @@ public void testNullSkipHintFallsThroughToOriginalCodeOnTimeRangeGate() throws I KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data); qm.setToNewRow(tooNew); - // With a null hint the gate must still return the original SKIP code. - assertEquals("Null getSkipHint must not change the original SKIP match code", MatchCode.SKIP, - qm.match(tooNew)); + assertEquals(MatchCode.SKIP, qm.match(tooNew)); } - /** - * HBASE-29974 Path 3 — column-exclusion gate ({@code checkColumn() != INCLUDE}). - *

    - * When an explicit-column scan encounters a qualifier not in its requested set, the column - * tracker returns a skip/seek code before {@code filterCell} is ever reached. With the fix, the - * matcher must consult {@code getSkipHint} first, and return - * {@link MatchCode#SEEK_NEXT_USING_HINT} when a non-null hint is available. - */ + /** Verify that getSkipHint is consulted when a column is excluded by the scan's column set. */ @Test public void testSkipHintConsultedForExcludedColumn() throws IOException { long now = EnvironmentEdgeManager.currentTime(); @@ -553,28 +503,17 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, KeyValue excludedCol = new KeyValue(row1, fam2, col1, 1, data); qm.setToNewRow(excludedCol); - assertEquals("Filter hint must promote column-exclusion result to SEEK_NEXT_USING_HINT", - MatchCode.SEEK_NEXT_USING_HINT, qm.match(excludedCol)); - assertEquals("getNextKeyHint must return the pending skip hint", hintCell, - qm.getNextKeyHint(excludedCol)); - assertNull("pendingSkipHint must be cleared after being drained", - qm.getNextKeyHint(excludedCol)); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(excludedCol)); + assertEquals(hintCell, qm.getNextKeyHint(excludedCol)); + assertNull(qm.getNextKeyHint(excludedCol)); } - /** - * HBASE-29974 Path 3 — version-exhaustion gate ({@code checkVersions()} returns - * {@link MatchCode#SEEK_NEXT_COL}). - *

    - * When the column tracker has already seen the maximum number of versions for a qualifier, it - * returns SKIP or SEEK_NEXT_COL for subsequent versions, again bypassing {@code filterCell}. With - * the fix, the matcher must consult {@code getSkipHint} at that point. - */ + /** Verify that getSkipHint is consulted when the version limit is exhausted. */ @Test public void testSkipHintConsultedOnVersionExhaustion() throws IOException { long now = EnvironmentEdgeManager.currentTime(); KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data); - // ScanInfo maxVersions=1: column tracker allows exactly 1 version per qualifier. Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell)); UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, @@ -582,39 +521,24 @@ public void testSkipHintConsultedOnVersionExhaustion() throws IOException { 0, rowComparator, false), null, now - ttl, now, null); - // First version of col1: passes all gates and reaches filterCell. KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data); qm.setToNewRow(version1); - MatchCode firstCode = qm.match(version1); - assertEquals("First version must be included", MatchCode.INCLUDE, firstCode); + assertEquals(MatchCode.INCLUDE, qm.match(version1)); - // Second version of col1: checkVersions returns SEEK_NEXT_COL (version limit exceeded). - // The filter hint must be consulted and SEEK_NEXT_USING_HINT returned. + // Second version: version limit exceeded, checkVersions returns SEEK_NEXT_COL. KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data); - assertEquals("Filter hint must promote version-exhaustion result to SEEK_NEXT_USING_HINT", - MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); - assertEquals("getNextKeyHint must return the pending skip hint from version gate", hintCell, - qm.getNextKeyHint(version2)); - assertNull("pendingSkipHint must be cleared after being drained", qm.getNextKeyHint(version2)); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); + assertEquals(hintCell, qm.getNextKeyHint(version2)); + assertNull(qm.getNextKeyHint(version2)); } - /** - * HBASE-29974 — {@code pendingSkipHint} must not interfere with the normal - * {@code filterCell → SEEK_NEXT_USING_HINT → getNextCellHint} path. - *

    - * If a cell passes all structural gates and reaches {@code filterCell}, which then returns - * {@link ReturnCode#SEEK_NEXT_USING_HINT}, the existing {@link Filter#getNextCellHint(Cell)} - * mechanism must still be used (not {@code getSkipHint}), and {@code pendingSkipHint} must remain - * null so it does not contaminate the result. - */ + /** Verify that the normal filterCell/getNextCellHint path is unaffected by pendingSkipHint. */ @Test public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOException { long now = EnvironmentEdgeManager.currentTime(); final KeyValue filterHintCell = new KeyValue(row2, fam2, col1, now, data); - // A filter that returns SEEK_NEXT_USING_HINT from filterCell and provides a hint through - // getNextCellHint, while NOT overriding getSkipHint (returns null by default). FilterBase seekFilter = new FilterBase() { @Override public ReturnCode filterCell(Cell c) { @@ -637,12 +561,8 @@ public Cell getNextCellHint(Cell currentCell) { KeyValue cell = new KeyValue(row1, fam2, col1, now - 10, data); qm.setToNewRow(cell); - assertEquals("filterCell path must still return SEEK_NEXT_USING_HINT", - MatchCode.SEEK_NEXT_USING_HINT, qm.match(cell)); - // pendingSkipHint was never set (getSkipHint returned null), so getNextKeyHint must - // delegate to filter.getNextCellHint(). - assertEquals("getNextKeyHint must delegate to getNextCellHint when no skip hint is pending", - filterHintCell, qm.getNextKeyHint(cell)); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(cell)); + assertEquals(filterHintCell, qm.getNextKeyHint(cell)); } /** From 385b1ec1a60d6e9de3938e1846cb79036a3ce62e Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Wed, 6 May 2026 14:12:53 +0530 Subject: [PATCH 6/9] Fixed issues with reverse scans. --- .../querymatcher/UserScanQueryMatcher.java | 11 +- .../filter/TestFilterHintForRejectedRow.java | 128 ++++++++++++++++++ .../TestUserScanQueryMatcher.java | 28 ++++ 3 files changed, 162 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index 1c7a8f475cbf..e7ec0ed53fd9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -71,6 +71,8 @@ public abstract class UserScanQueryMatcher extends ScanQueryMatcher { */ private ExtendedCell pendingSkipHint = null; + private final boolean reversed; + private static ExtendedCell createStartKey(Scan scan, ScanInfo scanInfo) { if (scan.includeStartRow()) { return createStartKeyFromRow(scan.getStartRow(), scanInfo); @@ -92,6 +94,7 @@ protected UserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker colum this.versionsAfterFilter = 0; } this.stopRow = scan.getStopRow(); + this.reversed = scan.isReversed(); TimeRange timeRange = scan.getColumnFamilyTimeRange().get(scanInfo.getFamily()); if (timeRange == null) { this.tr = scan.getTimeRange(); @@ -232,11 +235,9 @@ private boolean resolveSkipHint(ExtendedCell cell) throws IOException { + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); } ExtendedCell hint = (ExtendedCell) raw; - // Only accept the hint if it advances past the current cell. A backward or - // no-op hint would cause the scanner to re-visit the same cell, degrading - // to a tight re-match loop. This mirrors the guard in StoreScanner's - // SEEK_NEXT_USING_HINT handler (see StoreScanner line ~790). - if (rowComparator.compare(hint, cell) <= 0) { + // Only accept the hint if it advances past the current cell in scan direction. + int cmp = rowComparator.compare(hint, cell); + if ((!reversed && cmp <= 0) || (reversed && cmp >= 0)) { return false; } pendingSkipHint = hint; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java index 321e6ef6ac43..f8bfd485d0f3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -525,4 +525,132 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { CellUtil.equals(filteredResults.get(i), unfilteredResults.get(i))); } } + + @Test + public void testHintOvershootingStopRowTerminatesGracefully() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Scan rows 00-06 (stopRow=row-07, exclusive). Reject rows 00-02, hint to row-09. + // The hint overshoots the stop row, so the scan should terminate with no results + // from the rejected range and only include rows 03-06 from the non-rejected range. + final byte[] stopRow = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 9)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + // Hint past the stop row — scanner should terminate gracefully. + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + Scan scan = new Scan().addFamily(FAMILY).withStopRow(stopRow).setFilter(hintFilter); + List results = scanAll(scan); + + // The hint jumps past stopRow, so no cells should be returned. + assertTrue("Hint past stop row must terminate scan with no results", results.isEmpty()); + } + + @Test + public void testHintWithinStopRowReturnsCorrectResults() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Scan rows 00-06 (stopRow=row-07, exclusive). Reject rows 00-02, hint to row-03. + final byte[] stopRow = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).withStopRow(stopRow).setFilter(hintFilter); + List hintResults = scanAll(hintScan); + + // Should get rows 03-06 (4 rows). + Scan baselineScan = + new Scan().addFamily(FAMILY).withStartRow(acceptedStartRow).withStopRow(stopRow); + List baselineResults = scanAll(baselineScan); + + assertEquals("Hint within stop row must return same results as baseline", + baselineResults.size(), hintResults.size()); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(hintResults.get(i), baselineResults.get(i))); + } + assertEquals(4 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testGetSkipHintCalledForReversedScan() throws IOException { + final long insideTs = 1500; + final long tooNewTs = 5000; + final int rowCount = 5; + final byte[] qInside = Bytes.toBytes("q-inside"); + final byte[] qNew = Bytes.toBytes("q-new"); + + // Write two qualifiers per row: "q-inside" at ts=1500 (within range) and "q-new" at + // ts=5000 (above the range upper bound). The time-range gate fires for the too-new cell + // before version tracking can absorb it, ensuring getSkipHint is consulted. + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("skiprev-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(FAMILY, qInside, insideTs, VALUE); + p.addColumn(FAMILY, qNew, tooNewTs, VALUE); + region.put(p); + } + region.flush(true); + + final AtomicInteger skipHintCalls = new AtomicInteger(0); + + // Return null to verify the code path is reached without altering scan semantics. + // The unit test TestUserScanQueryMatcher#testSkipHintConsultedForReversedScan validates + // that a non-null hint is correctly accepted by the reversed-scan guard. + FilterBase skipHintFilter = new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + skipHintCalls.incrementAndGet(); + return null; + } + }; + + // Reversed scan with time range [1000, 3000): insideTs cells pass, tooNewTs cells are + // structurally skipped (tsCmp > 0). The skip hint should be consulted. + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setReversed(true) + .setFilter(skipHintFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setReversed(true); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals("Both paths must return the same number of cells", noHintResults.size(), + hintResults.size()); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue("Cell mismatch at index " + i, + CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + } + assertEquals(rowCount, hintResults.size()); + assertTrue("getSkipHint must be called at least once for reversed scan", + skipHintCalls.get() > 0); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java index 7fdca365a191..c2e59c3b6628 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java @@ -560,6 +560,34 @@ public Cell getNextCellHint(Cell currentCell) { assertEquals(filterHintCell, qm.getNextKeyHint(cell)); } + /** Verify that getSkipHint works correctly for reversed scans (hint must be smaller). */ + @Test + public void testSkipHintConsultedForReversedScan() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + long minTs = now - 2000; + long maxTs = now - 1000; + + // For reversed scan, the hint must point backward (smaller key). + KeyValue hintCell = new KeyValue(row1, fam2, col1, now - 1500, data); + Scan reversedScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setReversed(true) + .setFilter(new FixedSkipHintFilter(hintCell)); + + long now2 = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(reversedScan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, 0, now2, null); + + // ts=now2 >= maxTs so tsCmp > 0: time-range gate fires. + // row2 > row1 in natural order, so for reversed scan hint (row1) is "forward" (smaller). + KeyValue tooNew = new KeyValue(row2, fam2, col1, now2, data); + qm.setToNewRow(tooNew); + + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew)); + assertEquals(hintCell, qm.getNextKeyHint(tooNew)); + assertNull(qm.getNextKeyHint(tooNew)); + } + /** * After enough consecutive range delete markers, the matcher should switch from SKIP to * SEEK_NEXT_COL. Point deletes and KEEP_DELETED_CELLS always SKIP. From b86ffe1b2d2802e43614dff055d0c0604f817d7d Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Fri, 8 May 2026 09:31:44 +0530 Subject: [PATCH 7/9] Addressed review comments: defensive hint clearing, compareRows fix, and Javadoc improvements. - H1: Clear pendingSkipHint on setToNewRow/clearCurrentRow to prevent stale hint leaks - M2: Use compareRows in nextRowViaHint for semantically correct row-level comparison - M1: Document why resolveSkipHint intentionally uses full-key compare (intra-row hints) - L5: Add reversed-scan direction guidance to getHintForRejectedRow/getSkipHint Javadoc - H2/M6/M7: Document stop-row invariant, metrics semantics, and coprocessor hook behavior - H3: Document FilterList/SkipFilter/WhileMatchFilter composition limitation (follow-up JIRA) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../org/apache/hadoop/hbase/filter/Filter.java | 13 +++++++++++++ .../hbase/regionserver/RegionScannerImpl.java | 15 ++++++++++++++- .../querymatcher/UserScanQueryMatcher.java | 15 ++++++++++++++- 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index f29f99fb3cf2..423e5b20aeed 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -212,6 +212,13 @@ public enum ReturnCode { * side.

  • *
  • Returning {@code null} (the default) falls through to the existing {@code nextRow()} * behaviour, preserving full backward compatibility.
  • + *
  • For reversed scans ({@link org.apache.hadoop.hbase.client.Scan#isReversed()}), the hint + * must point to a smaller row key (earlier in reverse-scan direction). The scanner + * validates hint direction and falls back to {@code nextRow()} if the hint does not advance in + * the scan direction.
  • + *
  • Composite filter limitation: {@code FilterList}, {@code SkipFilter}, and + * {@code WhileMatchFilter} do not currently delegate this method to wrapped sub-filters. Hints + * from filters used inside these wrappers will be silently ignored.
  • * * @param firstRowCell the first cell encountered in the rejected row; contains the row key that * was passed to {@code filterRowKey} @@ -245,6 +252,12 @@ public Cell getHintForRejectedRow(final Cell firstRowCell) throws IOException { * side. *
  • Returning {@code null} (the default) falls through to the existing structural skip/seek * behaviour, preserving full backward compatibility.
  • + *
  • For reversed scans, the returned cell must have a smaller row key (i.e., earlier + * in reverse-scan direction) than the {@code skippedCell}. Hints that do not advance in the scan + * direction are silently ignored.
  • + *
  • Composite filter limitation: {@code FilterList}, {@code SkipFilter}, and + * {@code WhileMatchFilter} do not currently delegate this method to wrapped sub-filters. Hints + * from filters used inside these wrappers will be silently ignored.
  • * * @param skippedCell the cell that was rejected by the time-range, column, or version gate before * {@code filterCell} could be consulted diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 94ddb5b75098..7f81740f8fe6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -768,6 +768,19 @@ protected boolean nextRow(ScannerContext scannerContext, Cell curRowCell) throws * The skipping-row mode flag is set around the seek so that block-level size tracking continues * to function (consistent with {@link #nextRow}), and the filter state is reset afterwards so the * next row starts with a clean filter context. + *

    + * Stop-row invariant: This method does not validate that {@code hint} falls + * within the scan's stop row. If the hint overshoots, the next iteration's + * {@link #shouldStop(Cell)} check catches it and returns NO_MORE_VALUES. One wasted seek may + * occur, but correctness is maintained. + *

    + * Metrics note: The rows-scanned metric is incremented once by the caller for + * the rejected row. Rows physically skipped by the seek are not individually counted — this + * reflects the fact that no per-row work was done for those rows. + *

    + * Coprocessor note: {@code postScannerFilterRow} is invoked once with + * {@code curRowCell}, not once per skipped row. Coprocessors counting filtered rows should be + * aware of this semantic when the hint path is used. * @param scannerContext scanner context used for limit tracking * @param curRowCell the first cell of the row that was rejected by {@code filterRowKey}; * passed to the coprocessor hook for observability @@ -781,7 +794,7 @@ private boolean nextRowViaHint(ScannerContext scannerContext, Cell curRowCell, E throws IOException { assert this.joinedContinuationRow == null : "Trying to go to next row during joinedHeap read."; - int difference = comparator.compare(hint, curRowCell); + int difference = comparator.compareRows(hint, curRowCell); if ((!reversed && difference > 0) || (reversed && difference < 0)) { scannerContext.setSkippingRow(true); if (reversed) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index e7ec0ed53fd9..184427ca9f1b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -150,6 +150,18 @@ public void beforeShipped() throws IOException { } } + @Override + public void setToNewRow(ExtendedCell currentRow) { + super.setToNewRow(currentRow); + pendingSkipHint = null; + } + + @Override + public void clearCurrentRow() { + super.clearCurrentRow(); + pendingSkipHint = null; + } + // At each structural short-circuit below (time-range, column-exclusion, version-exhaustion), // the filter is consulted via resolveSkipHint() before falling back to the default skip/seek // code. This lets filters provide a forward seek target even when filterCell is never called. @@ -235,7 +247,8 @@ private boolean resolveSkipHint(ExtendedCell cell) throws IOException { + "is not an ExtendedCell. Filter class: " + filter.getClass().getName()); } ExtendedCell hint = (ExtendedCell) raw; - // Only accept the hint if it advances past the current cell in scan direction. + // Full-key compare is intentional: skip hints can advance within the same row + // (e.g., to a later column), not just across rows. int cmp = rowComparator.compare(hint, cell); if ((!reversed && cmp <= 0) || (reversed && cmp >= 0)) { return false; From dc003afcdb6bb965e35be500b00beab40f4bb795 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Fri, 8 May 2026 10:30:19 +0530 Subject: [PATCH 8/9] Addressed round-2 review comments (N1, N2, N4). - N4: Clear pendingSkipHint before super.setToNewRow/clearCurrentRow (defense-in-depth) - N2: Move incrementCountOfRowsScannedMetric after getHintForRejectedRow to avoid bumping metrics when a buggy filter throws DNRIOE - N1/N8: Add testPendingSkipHintClearedOnRowTransition to verify hint is invalidated on row transitions even when not consumed via getNextKeyHint Co-Authored-By: Claude Opus 4.6 (1M context) --- .../hbase/regionserver/RegionScannerImpl.java | 10 ++++--- .../querymatcher/UserScanQueryMatcher.java | 4 +-- .../TestUserScanQueryMatcher.java | 29 +++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 7f81740f8fe6..1c3530e7996d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -507,13 +507,15 @@ private boolean nextInternal(List results, ScannerContext if (isFilterDoneInternal()) { return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } - // Typically the count of rows scanned is incremented inside #populateResult. However, - // here we are filtering a row based purely on its row key, preventing us from calling - // #populateResult. Thus, perform the necessary increment here to rows scanned metric - incrementCountOfRowsScannedMetric(scannerContext); // HBASE-29974: ask the filter for a seek hint so we can jump directly past the rejected // row instead of iterating through its cells one-by-one via nextRow(). ExtendedCell rowHint = getHintForRejectedRow(current); + // Typically the count of rows scanned is incremented inside #populateResult. However, + // here we are filtering a row based purely on its row key, preventing us from calling + // #populateResult. Thus, perform the necessary increment here to rows scanned metric. + // Placed after getHintForRejectedRow so that a buggy filter throwing DNRIOE doesn't + // leave the metric incremented for a row that was never actually processed. + incrementCountOfRowsScannedMetric(scannerContext); boolean moreRows = (rowHint != null) ? nextRowViaHint(scannerContext, current, rowHint) : nextRow(scannerContext, current); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index 184427ca9f1b..0c7c902afb98 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -152,14 +152,14 @@ public void beforeShipped() throws IOException { @Override public void setToNewRow(ExtendedCell currentRow) { - super.setToNewRow(currentRow); pendingSkipHint = null; + super.setToNewRow(currentRow); } @Override public void clearCurrentRow() { - super.clearCurrentRow(); pendingSkipHint = null; + super.clearCurrentRow(); } // At each structural short-circuit below (time-range, column-exclusion, version-exhaustion), diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java index c2e59c3b6628..da5d70e12b82 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java @@ -527,6 +527,35 @@ public void testSkipHintConsultedOnVersionExhaustion() throws IOException { assertNull(qm.getNextKeyHint(version2)); } + /** Verify that pendingSkipHint is cleared when a row transition occurs via setToNewRow. */ + @Test + public void testPendingSkipHintClearedOnRowTransition() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + + KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data); + Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new FixedSkipHintFilter(hintCell)); + + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, + 0, rowComparator, false), + null, now - ttl, now, null); + + // Trigger a structural skip that stores pendingSkipHint. + KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data); + qm.setToNewRow(version1); + assertEquals(MatchCode.INCLUDE, qm.match(version1)); + KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data); + assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2)); + + // Do NOT consume the hint via getNextKeyHint. Instead, simulate a row transition. + KeyValue newRowCell = new KeyValue(row2, fam2, col1, now - 10, data); + qm.setToNewRow(newRowCell); + + // After row transition, pendingSkipHint must be null — getNextKeyHint should delegate + // to the filter's getNextCellHint (which returns null for FixedSkipHintFilter). + assertNull(qm.getNextKeyHint(newRowCell)); + } + /** Verify that the normal filterCell/getNextCellHint path is unaffected by pendingSkipHint. */ @Test public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws IOException { From 8dfa4051b5d3894a1293e6904c99fa70d66c20bb Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Sat, 9 May 2026 16:34:47 +0530 Subject: [PATCH 9/9] Migrated TestFilterHintForRejectedRow to JUnit 5. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../filter/TestFilterHintForRejectedRow.java | 138 ++++++++---------- 1 file changed, 64 insertions(+), 74 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java index f8bfd485d0f3..ba57ff563b74 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hbase.filter; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.ArrayList; @@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.PrivateCellUtil; import org.apache.hadoop.hbase.TableName; @@ -43,21 +42,16 @@ import org.apache.hadoop.hbase.testclassification.FilterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; -import org.junit.After; -import org.junit.Before; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.rules.TestName; - -@Category({ FilterTests.class, MediumTests.class }) +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; + +@Tag(FilterTests.TAG) +@Tag(MediumTests.TAG) public class TestFilterHintForRejectedRow { - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestFilterHintForRejectedRow.class); - private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); private static final byte[] FAMILY = Bytes.toBytes("f"); @@ -67,13 +61,10 @@ public class TestFilterHintForRejectedRow { private HRegion region; - @Rule - public TestName name = new TestName(); - - @Before - public void setUp() throws Exception { + @BeforeEach + public void setUp(TestInfo testInfo) throws Exception { TableDescriptor tableDescriptor = - TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName())) + TableDescriptorBuilder.newBuilder(TableName.valueOf(testInfo.getTestMethod().get().getName())) .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)) .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY2)).build(); RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build(); @@ -81,7 +72,7 @@ public void setUp() throws Exception { TEST_UTIL.getConfiguration(), tableDescriptor); } - @After + @AfterEach public void tearDown() throws Exception { HBaseTestingUtil.closeRegionAndWAL(this.region); } @@ -163,16 +154,15 @@ public boolean filterRowKey(Cell cell) { List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(hintFilter)); List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); - assertEquals("Both paths must return the same number of cells", noHintResults.size(), - hintResults.size()); + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); for (int i = 0; i < hintResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); } assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size()); - assertEquals( - "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows", 1, - hintCallCount.get()); + assertEquals(1, hintCallCount.get(), + "getHintForRejectedRow must be called exactly once: the hint skips all rejected rows"); } @Test @@ -195,8 +185,8 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { }; List results = scanAll(new Scan().addFamily(FAMILY).setFilter(beyondHintFilter)); - assertTrue("When the hint is past the last row, no cells should be returned", - results.isEmpty()); + assertTrue(results.isEmpty(), + "When the hint is past the last row, no cells should be returned"); } @Test @@ -227,13 +217,13 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); for (Cell c : results) { - assertTrue("Every returned cell must belong to the accepted row range", + assertTrue( Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(), acceptedStartRow, 0, - acceptedStartRow.length) >= 0); + acceptedStartRow.length) >= 0, + "Every returned cell must belong to the accepted row range"); } - assertEquals( - "getHintForRejectedRow must be called once per rejected row in the per-row hint strategy", - rejectedCount, hintCallCount.get()); + assertEquals(rejectedCount, hintCallCount.get(), + "getHintForRejectedRow must be called once per rejected row in the per-row hint strategy"); } @Test @@ -278,15 +268,15 @@ public boolean filterRowKey(Cell cell) { List hintResults = scanAll(reversedHintScan); List noHintResults = scanAll(reversedNoHintScan); - assertEquals("Both paths must return the same number of cells", noHintResults.size(), - hintResults.size()); + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); for (int i = 0; i < hintResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); } assertEquals(5 * CELLS_PER_ROW, hintResults.size()); - assertEquals("getHintForRejectedRow must be called exactly once for reversed scan", 1, - hintCallCount.get()); + assertEquals(1, hintCallCount.get(), + "getHintForRejectedRow must be called exactly once for reversed scan"); } @Test @@ -321,11 +311,11 @@ public Cell getSkipHint(Cell skippedCell) { List hintResults = scanAll(hintScan); List noHintResults = scanAll(noHintScan); - assertEquals("Both paths must return the same number of cells", noHintResults.size(), - hintResults.size()); + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); for (int i = 0; i < hintResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); } assertEquals(rowCount, hintResults.size()); } @@ -362,11 +352,11 @@ public boolean filterRowKey(Cell cell) { List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(backwardHintFilter)); List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); - assertEquals("Backward hint must produce same results as no-hint path", noHintResults.size(), - hintResults.size()); + assertEquals(noHintResults.size(), hintResults.size(), + "Backward hint must produce same results as no-hint path"); for (int i = 0; i < hintResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); } } @@ -403,11 +393,11 @@ public boolean filterRowKey(Cell cell) { List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(sameRowHintFilter)); List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); - assertEquals("Same-row hint must produce same results as no-hint path", noHintResults.size(), - hintResults.size()); + assertEquals(noHintResults.size(), hintResults.size(), + "Same-row hint must produce same results as no-hint path"); for (int i = 0; i < hintResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); } assertEquals(3 * CELLS_PER_ROW, hintResults.size()); } @@ -481,11 +471,11 @@ public boolean filterRowKey(Cell cell) { List hintResults = scanAll(hintScan); List noHintResults = scanAll(noHintScan); - assertEquals("Both paths must return the same number of cells", noHintResults.size(), - hintResults.size()); + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); for (int i = 0; i < hintResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); } assertEquals(2 * CELLS_PER_ROW * acceptedCount, hintResults.size()); assertEquals(1, hintCallCount.get()); @@ -518,11 +508,11 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { Scan unfilteredScan = new Scan().addFamily(FAMILY).withStartRow(acceptedStartRow); List unfilteredResults = scanAll(unfilteredScan); - assertEquals("Filtered and unfiltered scans must return same cell count", - unfilteredResults.size(), filteredResults.size()); + assertEquals(unfilteredResults.size(), filteredResults.size(), + "Filtered and unfiltered scans must return same cell count"); for (int i = 0; i < filteredResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(filteredResults.get(i), unfilteredResults.get(i))); + assertTrue(CellUtil.equals(filteredResults.get(i), unfilteredResults.get(i)), + "Cell mismatch at index " + i); } } @@ -557,7 +547,7 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { List results = scanAll(scan); // The hint jumps past stopRow, so no cells should be returned. - assertTrue("Hint past stop row must terminate scan with no results", results.isEmpty()); + assertTrue(results.isEmpty(), "Hint past stop row must terminate scan with no results"); } @Test @@ -591,11 +581,11 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { new Scan().addFamily(FAMILY).withStartRow(acceptedStartRow).withStopRow(stopRow); List baselineResults = scanAll(baselineScan); - assertEquals("Hint within stop row must return same results as baseline", - baselineResults.size(), hintResults.size()); + assertEquals(baselineResults.size(), hintResults.size(), + "Hint within stop row must return same results as baseline"); for (int i = 0; i < hintResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(hintResults.get(i), baselineResults.get(i))); + assertTrue(CellUtil.equals(hintResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); } assertEquals(4 * CELLS_PER_ROW, hintResults.size()); } @@ -643,14 +633,14 @@ public Cell getSkipHint(Cell skippedCell) { List hintResults = scanAll(hintScan); List noHintResults = scanAll(noHintScan); - assertEquals("Both paths must return the same number of cells", noHintResults.size(), - hintResults.size()); + assertEquals(noHintResults.size(), hintResults.size(), + "Both paths must return the same number of cells"); for (int i = 0; i < hintResults.size(); i++) { - assertTrue("Cell mismatch at index " + i, - CellUtil.equals(hintResults.get(i), noHintResults.get(i))); + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); } assertEquals(rowCount, hintResults.size()); - assertTrue("getSkipHint must be called at least once for reversed scan", - skipHintCalls.get() > 0); + assertTrue(skipHintCalls.get() > 0, + "getSkipHint must be called at least once for reversed scan"); } }