Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
import org.apache.phoenix.jdbc.PhoenixPreparedStatement;
import org.apache.phoenix.query.QueryServices;
import org.apache.phoenix.query.QueryServicesOptions;
import org.apache.phoenix.replication.reader.ReplicationLogReplayService;
import org.apache.phoenix.schema.CompiledConditionalTTLExpression;
import org.apache.phoenix.schema.CompiledTTLExpression;
import org.apache.phoenix.schema.ConditionalTTLExpression;
Expand Down Expand Up @@ -139,6 +140,7 @@ public class CompactionScanner implements InternalScanner {
private final Store store;
private final RegionCoprocessorEnvironment env;
private long maxLookbackWindowStart;
private final long replicationConsistencyPoint;
private final long maxLookbackInMillis;
private int minVersion;
private int maxVersion;
Expand Down Expand Up @@ -199,8 +201,19 @@ public CompactionScanner(RegionCoprocessorEnvironment env, Store store,
this.maxLookbackWindowStart = this.maxLookbackInMillis == 0
? compactionTime
: compactionTime - (this.maxLookbackInMillis + 1);
ColumnFamilyDescriptor cfd = store.getColumnFamilyDescriptor();
Configuration conf = env.getConfiguration();
this.major = major && !forceMinorCompaction;
boolean replayEnabled =
conf.getBoolean(ReplicationLogReplayService.PHOENIX_REPLICATION_REPLAY_ENABLED,
ReplicationLogReplayService.DEFAULT_REPLICATION_REPLAY_ENABLED);
if (this.major && replayEnabled) {
this.replicationConsistencyPoint =
ReplicationLogReplayService.resolveConsistencyPoint(conf, tableName, columnFamilyName);
} else {
this.replicationConsistencyPoint =
ReplicationLogReplayService.CONSISTENCY_POINT_GUARD_DISABLED;
}
ColumnFamilyDescriptor cfd = store.getColumnFamilyDescriptor();
this.minVersion = cfd.getMinVersions();
this.maxVersion = cfd.getMaxVersions();
this.keepDeletedCells = keepDeleted ? KeepDeletedCells.TTL : cfd.getKeepDeletedCells();
Expand Down Expand Up @@ -1631,6 +1644,33 @@ private String getTenantIdFromRowKey(byte[] rowKey, boolean isSharedIndex) throw
}
}

/**
* Computes the effective max-lookback boundary for a row, capped by the replication consistency
* point. The consistency point represents an exclusive upper bound: everything with ts <
* consistencyPoint has been replayed. We subtract 1 so that cells at exactly ts ==
* consistencyPoint satisfy the strict-greater retention check and are retained.
* @param ttlWindowStart row TTL window start in millis since epoch
* @param maxLookbackWindowStart store-level max-lookback window start in millis since epoch
* @param replicationConsistencyPoint exclusive upper bound of replayed timestamps;
* CONSISTENCY_POINT_UNAVAILABLE (0) retains all,
* CONSISTENCY_POINT_GUARD_DISABLED (Long.MAX_VALUE) means
* guard is a no-op
* @return effective boundary for the strict-greater retention compare (millis since epoch)
*/
public static long computeRowMaxLookbackWithGuard(long ttlWindowStart,
long maxLookbackWindowStart, long replicationConsistencyPoint) {
if (
replicationConsistencyPoint == ReplicationLogReplayService.CONSISTENCY_POINT_UNAVAILABLE
|| replicationConsistencyPoint
== ReplicationLogReplayService.CONSISTENCY_POINT_GUARD_DISABLED
) {
return Math.min(Math.max(ttlWindowStart, maxLookbackWindowStart),
replicationConsistencyPoint);
}
return Math.min(Math.max(ttlWindowStart, maxLookbackWindowStart),
replicationConsistencyPoint - 1);
}

/**
* The context for a given row during compaction. A row may have multiple compaction row versions.
* CompactionScanner uses the same row context for these versions.
Expand All @@ -1657,10 +1697,14 @@ private void init() {
private void setTTL(long ttlInSecs) {
this.ttl = Math.max(ttlInSecs * 1000, maxLookbackInMillis + 1);
this.ttlWindowStart = ttlInSecs == HConstants.FOREVER ? 1 : compactionTime - ttl;
this.maxLookbackWindowStartForRow = Math.max(ttlWindowStart, maxLookbackWindowStart);
this.maxLookbackWindowStartForRow = computeRowMaxLookbackWithGuard(ttlWindowStart,
maxLookbackWindowStart, replicationConsistencyPoint);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(String.format("RowContext:- (ttlWindowStart=%d, maxLookbackWindowStart=%d)",
ttlWindowStart, maxLookbackWindowStart));
LOGGER.trace(String.format(
"RowContext:- (ttlWindowStart=%d, maxLookbackWindowStart=%d, "
+ "replicationConsistencyPoint=%d, maxLookbackWindowStartForRow=%d)",
ttlWindowStart, maxLookbackWindowStart, replicationConsistencyPoint,
maxLookbackWindowStartForRow));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public class ReplicationLogDiscoveryReplay extends ReplicationLogDiscovery {
*/
public static final double DEFAULT_WAITING_BUFFER_PERCENTAGE = 15.0;

private ReplicationRound lastRoundInSync;
private volatile ReplicationRound lastRoundInSync;

// AtomicReference ensures listener updates are visible to replay thread
private final AtomicReference<ReplicationReplayState> replicationReplayState =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.phoenix.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.phoenix.thirdparty.com.google.common.base.Supplier;
import org.apache.phoenix.thirdparty.com.google.common.base.Suppliers;
import org.apache.phoenix.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
Expand Down Expand Up @@ -77,14 +80,51 @@ public class ReplicationLogReplayService {
*/
public static final int DEFAULT_REPLICATION_REPLAY_SERVICE_EXECUTOR_SHUTDOWN_TIMEOUT_SECONDS = 30;

public static final long CONSISTENCY_POINT_UNAVAILABLE = 0L;
public static final long CONSISTENCY_POINT_GUARD_DISABLED = Long.MAX_VALUE;

public static final String CONSISTENCY_POINT_CACHE_TTL_SECONDS_KEY =
"phoenix.replication.compaction.guard.cache.ttl.seconds";
public static final long DEFAULT_CONSISTENCY_POINT_CACHE_TTL_SECONDS = 30;

private static volatile long lastFallbackWarnTime = 0;
private static final long WARN_LOG_INTERVAL_MS = 60_000;

private static volatile ReplicationLogReplayService instance;

private final Configuration conf;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The two test-only constructors in ReplicationLogReplayService do not initialize the blank-final field conf. This will break the build.

ReplicationLogReplayService.java:116: error: variable conf might not have been initialized
ReplicationLogReplayService.java:121: error: variable conf might not have been initialized

private ScheduledExecutorService scheduler;
private volatile boolean isRunning = false;
private final Supplier<Long> cachedConsistencyPoint;

private ReplicationLogReplayService(final Configuration conf) {
this.conf = conf;
long cacheTtl = conf.getLong(CONSISTENCY_POINT_CACHE_TTL_SECONDS_KEY,
DEFAULT_CONSISTENCY_POINT_CACHE_TTL_SECONDS);
// Guava's memoizeWithExpiration does NOT cache exceptions — a thrown RuntimeException
// causes the next get() to re-invoke the supplier. We rely on this: transient failures
// (NN flap, SYSTEM.HA_GROUP unavailable) retry on the next compaction rather than
// caching a stale fallback for the full TTL.
this.cachedConsistencyPoint = Suppliers.memoizeWithExpiration(() -> {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suppliers.memoizeWithExpiration uses synchronized(this) to serialize loads, but that lock is on the supplier object. This establishes no happens-before relationship with the replay thread's writes to lastRoundInSync.

So you may end up advancing the consistency point too far back, or even getting a a null back from the supplier even after the replay thread has already set it, spuriously throwing an IOException. The net effect is sometimes a major compaction won't drop delete markers when it should.

Something needs to enforce the happens-before relationship. Perhaps volatile ReplicationRound lastRoundInSync , or consider AtomicReference<ReplicationRound>. Or redo this.

try {
return getConsistencyPoint();
} catch (IOException | SQLException e) {
throw new RuntimeException("Failed to fetch consistency point", e);
}
}, cacheTtl, TimeUnit.SECONDS);
}

private ReplicationLogReplayService(Configuration conf, long fixedConsistencyPoint) {
this.conf = conf;
this.cachedConsistencyPoint = () -> fixedConsistencyPoint;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the final field conf been initialized ?

}

private ReplicationLogReplayService(Configuration conf, Supplier<Long> supplier) {
this.conf = conf;
long cacheTtl = conf.getLong(CONSISTENCY_POINT_CACHE_TTL_SECONDS_KEY,
DEFAULT_CONSISTENCY_POINT_CACHE_TTL_SECONDS);
this.cachedConsistencyPoint =
Suppliers.memoizeWithExpiration(supplier, cacheTtl, TimeUnit.SECONDS);
}

/**
Expand All @@ -105,6 +145,28 @@ public static ReplicationLogReplayService getInstance(Configuration conf) throws
return instance;
}

@VisibleForTesting
public static void setConsistencyPointForTesting(Configuration conf, long fixedConsistencyPoint) {
synchronized (ReplicationLogReplayService.class) {
instance = new ReplicationLogReplayService(conf, fixedConsistencyPoint);
}
}

@VisibleForTesting
public static void setConsistencyPointSupplierForTesting(Configuration conf,
Supplier<Long> supplier) {
synchronized (ReplicationLogReplayService.class) {
instance = new ReplicationLogReplayService(conf, supplier);
}
}

@VisibleForTesting
public static void resetInstanceForTesting() {
synchronized (ReplicationLogReplayService.class) {
instance = null;
}
}
Comment thread
Himanshu-g81 marked this conversation as resolved.

/**
* Starts the replication log replay service by initializing the scheduler and scheduling periodic
* replay operations for each HA Group.
Expand Down Expand Up @@ -229,6 +291,32 @@ protected long getConsistencyPoint() throws IOException, SQLException {
return consistencyPoint;
}

/**
* Resolves the minimum replication consistency point across all HA groups. Uses a cached value
* with a configurable TTL (see {@link #CONSISTENCY_POINT_CACHE_TTL_SECONDS_KEY}) to avoid
* repeated RPCs during compaction bursts. Returns {@link #CONSISTENCY_POINT_UNAVAILABLE} on any
* failure (caller treats this as "retain all delete markers").
*/
public static long resolveConsistencyPoint(Configuration conf, String tableName,
String columnFamilyName) {
try {
long consistencyPoint = getInstance(conf).cachedConsistencyPoint.get();
if (LOG.isDebugEnabled()) {
LOG.debug("Replication guard: table={} store={} consistencyPoint={}", tableName,
columnFamilyName, consistencyPoint);
}
return consistencyPoint;
} catch (Exception e) {
long now = System.currentTimeMillis();
if (now - lastFallbackWarnTime > WARN_LOG_INTERVAL_MS) {
lastFallbackWarnTime = now;
LOG.warn("Replication guard: consistency point unavailable for table={} store={}."
+ " Retaining all delete markers.", tableName, columnFamilyName, e);
}
return CONSISTENCY_POINT_UNAVAILABLE;
}
}

/** Returns the list of HA groups on the cluster */
protected List<String> getReplicationGroups() throws SQLException {
return HAGroupStoreManager.getInstance(conf).getHAGroupNames();
Expand Down
Loading