From 39049e46b594d515b6c38d289b037647e8ec0911 Mon Sep 17 00:00:00 2001 From: Gabriel Roldan Date: Fri, 15 May 2026 16:45:19 -0300 Subject: [PATCH 1/2] diskquota/jdbc: retry SERIALIZABLE aborts (#1527) Concurrent writers hitting the same TILESET or TILEPAGE row produced serialization aborts that escaped JDBCQuotaStore and reached QueuedQuotaUpdatesConsumer, which logs at FINE and drops the aggregated batch, silently losing pending quota updates and letting the on-disk ledger drift out of sync. SERIALIZABLE isolation is kept as-is; the missing piece was the application-level retry that SERIALIZABLE assumes. Wrap every JDBCQuotaStore transaction in a bounded-retry helper whose predicate recognises serialization aborts from Postgres SSI, HSQL (SQLState class "40"), and Oracle (vendor codes 8176/8177). Oracle additionally gets a DEFERRABLE INITIALLY DEFERRED TILEPAGE -> TILESET foreign key, the matching migrate-on-upgrade path, and a PL/SQL renameLayer so TILESET.KEY and TILEPAGE.TILESET_ID rewrite atomically at commit. Verified end-to-end against PostgreSQL and Oracle XE testcontainers. on-behalf-of: @camptocamp --- .../diskquota/jdbc/JDBCQuotaStore.java | 125 +++++++++-- .../diskquota/jdbc/OracleDialect.java | 61 +++--- .../diskquota/jdbc/SQLDialect.java | 61 +++--- .../jdbc/AbstractForeignKeyMigrationTest.java | 101 +++++---- ...AbstractJDBCQuotaStoreConcurrencyTest.java | 203 ++++++++++++++++++ .../jdbc/HSQLQuotaStoreConcurrencyTest.java | 36 ++++ .../jdbc/JDBCQuotaStoreRetryTest.java | 108 ++++++++++ .../OracleForeignKeyMigrationIT.java | 89 ++++++++ .../OracleQuotaStoreConcurrencyIT.java | 63 ++++++ .../tests/container/OracleQuotaStoreIT.java | 46 +--- .../PostgreSQLQuotaStoreConcurrencyIT.java | 42 ++++ .../jdbc/src/test/resources/log4j2-test.xml | 4 + 12 files changed, 769 insertions(+), 170 deletions(-) create mode 100644 geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/AbstractJDBCQuotaStoreConcurrencyTest.java create mode 100644 geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/HSQLQuotaStoreConcurrencyTest.java create mode 100644 geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/JDBCQuotaStoreRetryTest.java create mode 100644 geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleForeignKeyMigrationIT.java create mode 100644 geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleQuotaStoreConcurrencyIT.java create mode 100644 geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/PostgreSQLQuotaStoreConcurrencyIT.java diff --git a/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/JDBCQuotaStore.java b/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/JDBCQuotaStore.java index 18e9da7f75..3f632c681f 100644 --- a/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/JDBCQuotaStore.java +++ b/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/JDBCQuotaStore.java @@ -29,6 +29,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Consumer; import java.util.logging.Level; import java.util.logging.Logger; import javax.sql.DataSource; @@ -51,6 +53,7 @@ import org.springframework.jdbc.datasource.DataSourceTransactionManager; import org.springframework.transaction.TransactionStatus; import org.springframework.transaction.support.TransactionCallback; +import org.springframework.transaction.support.TransactionSynchronizationManager; import org.springframework.transaction.support.TransactionTemplate; /** @@ -86,6 +89,20 @@ public class JDBCQuotaStore implements QuotaStore { /** Max number of attempts we do to insert/update page stats in race-free mode */ int maxLoops = 100; + /** Max attempts in {@link #executeWithRetry(TransactionCallback)} before propagating the abort. */ + int maxTransactionAttempts = 10; + + /** Initial backoff between transaction retries, in milliseconds; doubles each retry, with full jitter. */ + long initialTransactionBackoffMs = 10L; + + private static final long MAX_TRANSACTION_BACKOFF_MS = 500L; + + /** Oracle ORA-08176: consistent read failure; rollback data not available. */ + private static final int ORA_08176 = 8176; + + /** Oracle ORA-08177: can't serialize access for this transaction. */ + private static final int ORA_08177 = 8177; + /** The executor used for asynch requests */ ExecutorService executor; @@ -159,10 +176,10 @@ public void initialize() { throw new IllegalStateException( "Please provide both the sql dialect and the data " + "source before calling inizialize"); } - tt.executeWithoutResult(status -> { - - // setup the tables if necessary - dialect.initializeTables(schema, jt); + // DDL must run outside the wrapping transaction: Oracle auto-commits it and a SERIALIZABLE + // read across the just-created indexes would abort with ORA-08176 on the first SELECT. + dialect.initializeTables(schema, jt); + executeWithRetry(status -> { // get the existing table names List existingLayers = jt.query(dialect.getAllLayersQuery(schema), (rs, rowNum) -> rs.getString(1)); @@ -196,7 +213,7 @@ public void createLayer(String layerName) throws InterruptedException { } private void createLayerInternal(final String layerName) { - tt.executeWithoutResult(status -> { + executeWithRetry(status -> { Set layerTileSets; if (!GLOBAL_QUOTA_NAME.equals(layerName)) { layerTileSets = calculator.getTileSetsFor(layerName); @@ -276,14 +293,14 @@ private Quota nonNullQuota(Quota optionalQuota) { @Override public void deleteLayer(final String layerName) { - tt.executeWithoutResult(status -> { + executeWithRetry(status -> { deleteLayerInternal(layerName); }); } @Override public void deleteGridSubset(final String layerName, final String gridSetId) { - tt.executeWithoutResult(status -> { + executeWithRetry(status -> { // get the disk quota used by the layer gridset Quota quota = getUsedQuotaByLayerGridset(layerName, gridSetId); // we will subtracting the current disk quota value @@ -305,7 +322,7 @@ public void deleteGridSubset(final String layerName, final String gridSetId) { public void deleteLayerInternal(final String layerName) { getUsedQuotaByLayerName(layerName); - tt.executeWithoutResult(status -> { + executeWithRetry(status -> { // update the global quota Quota quota = getUsedQuotaByLayerName(layerName); quota.setBytes(quota.getBytes().negate()); @@ -324,7 +341,7 @@ public void deleteLayerInternal(final String layerName) { @Override public void renameLayer(final String oldLayerName, final String newLayerName) throws InterruptedException { - tt.executeWithoutResult(status -> { + executeWithRetry(status -> { String sql = dialect.getRenameLayerStatement(schema, "oldName", "newName"); Map params = new HashMap<>(); params.put("oldName", oldLayerName); @@ -429,7 +446,7 @@ public TilePageCalculator getTilePageCalculator() { public void addToQuotaAndTileCounts( final TileSet tileSet, final Quota quotaDiff, final Collection tileCountDiffs) throws InterruptedException { - tt.executeWithoutResult(status -> { + executeWithRetry(status -> { getOrCreateTileSet(tileSet); updateQuotas(tileSet, quotaDiff); @@ -609,7 +626,7 @@ private PageStats getPageStats(String pageStatsKey) { @Override @SuppressWarnings("unchecked") public Future> addHitsAndSetAccesTime(final Collection statsUpdates) { - return executor.submit(() -> (List) tt.execute(new QuotaStoreCallback(statsUpdates))); + return executor.submit(() -> (List) executeWithRetry(new QuotaStoreCallback(statsUpdates))); } @Override @@ -651,7 +668,7 @@ private TilePage getSinglePage(Set layerNames, boolean leastFrequentlyUs @Override public PageStats setTruncated(final TilePage page) throws InterruptedException { - return (PageStats) tt.execute((TransactionCallback) status -> { + return (PageStats) executeWithRetry((TransactionCallback) status -> { if (log.isLoggable(Level.FINE)) { log.info("Truncating page " + page); } @@ -693,6 +710,88 @@ public void close() throws Exception { jt = null; } + /** + * Runs {@code action} in a SERIALIZABLE transaction, retrying on concurrency aborts with bounded exponential + * backoff. If the call is already nested inside an active transaction the retry loop is skipped: Spring's + * {@code PROPAGATION_REQUIRED} would reuse the same stale snapshot, so only the outermost call can recover. + */ + private T executeWithRetry(TransactionCallback action) { + if (TransactionSynchronizationManager.isActualTransactionActive()) { + return tt.execute(action); + } + long backoff = initialTransactionBackoffMs; + for (int attempt = 1; ; attempt++) { + try { + return tt.execute(action); + } catch (DataAccessException e) { + if (!isTransactionAbort(e)) { + throw e; + } + if (attempt >= maxTransactionAttempts) { + log.log( + Level.WARNING, + "DiskQuota transaction failed after " + attempt + " attempts: " + e.getMessage(), + e); + throw e; + } + long sleep = backoff + ThreadLocalRandom.current().nextLong(backoff); + try { + Thread.sleep(sleep); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw e; + } + if (log.isLoggable(Level.FINE)) { + log.fine("DiskQuota transaction conflict on attempt " + + attempt + + "/" + + maxTransactionAttempts + + ", retrying after " + + sleep + + "ms: " + + e.getMessage()); + } + backoff = Math.min(backoff * 2, MAX_TRANSACTION_BACKOFF_MS); + } + } + } + + /** Void variant of {@link #executeWithRetry(TransactionCallback)}. */ + private void executeWithRetry(Consumer action) { + executeWithRetry((TransactionCallback) status -> { + action.accept(status); + return null; + }); + } + + /** + * Walks the cause chain looking for a retryable concurrency abort. Spring's translator alone is not enough: + * SQLSTATE class {@code 40} catches HSQL's bare {@link ConcurrencyFailureException}, and Oracle vendor codes 8176 + * and 8177 are needed because Spring leaves 8176 uncategorized and routes 8177 to a deprecated sibling of + * {@link PessimisticLockingFailureException}. + */ + private static boolean isTransactionAbort(Throwable t) { + for (Throwable cause = t; cause != null; cause = cause.getCause()) { + if (cause instanceof PessimisticLockingFailureException) { + return true; + } + if (cause instanceof SQLException sqlException) { + String sqlState = sqlException.getSQLState(); + if (sqlState != null && sqlState.startsWith("40")) { + return true; + } + if (isRetryableOracleCode(sqlException.getErrorCode())) { + return true; + } + } + } + return false; + } + + private static boolean isRetryableOracleCode(int errorCode) { + return errorCode == ORA_08176 || errorCode == ORA_08177; + } + /** * Maps a BigDecimal column into a Quota object * @@ -752,7 +851,7 @@ public TilePage mapRow(ResultSet rs, int rowNum) throws SQLException { @Override public void deleteParameters(final String layerName, final String parametersId) { - tt.executeWithoutResult(status -> { + executeWithRetry(status -> { // first gather the disk quota used by the gridset, and update the global // quota Quota quota = getUsedQuotaByParametersId(parametersId); diff --git a/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/OracleDialect.java b/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/OracleDialect.java index b1604d0573..379095c364 100644 --- a/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/OracleDialect.java +++ b/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/OracleDialect.java @@ -13,6 +13,9 @@ */ package org.geowebcache.diskquota.jdbc; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; import java.util.List; /** @@ -60,7 +63,8 @@ BYTES NUMBER(%d) DEFAULT 0 NOT NULL """ CREATE TABLE ${schema}TILEPAGE ( KEY VARCHAR(%d) PRIMARY KEY, - TILESET_ID VARCHAR(%d) REFERENCES ${schema}TILESET(KEY) ON DELETE CASCADE, + TILESET_ID VARCHAR(%d) REFERENCES ${schema}TILESET(KEY) ON DELETE CASCADE + DEFERRABLE INITIALLY DEFERRED, PAGE_Z SMALLINT, PAGE_X INTEGER, PAGE_Y INTEGER, @@ -84,37 +88,46 @@ protected void addEmtpyTableReference(StringBuilder sb) { } /** - * No-op: Oracle does not support {@code ON UPDATE CASCADE} on foreign keys, so there is nothing portable to - * migrate. Companion to {@link #getRenameLayerStatement(String, String, String)}, which preserves the legacy - * LAYER_NAME-only behavior on this dialect. + * Oracle does not support {@code ON UPDATE CASCADE}, so the FK is migrated to {@code DEFERRABLE INITIALLY DEFERRED} + * instead. Deferring the check to commit time also drops the per-INSERT snapshot read on TILESET that triggers + * ORA-08176 under SERIALIZABLE. */ @Override - public void migrateForeignKeys(String schema, SimpleJdbcTemplate template) { - // intentional no-op + protected boolean tilepageFkIsMigrated(ResultSet rs) throws SQLException { + return rs.getShort("DEFERRABILITY") == DatabaseMetaData.importedKeyInitiallyDeferred; + } + + @Override + protected String tilepageFkAddSql(String prefixedTilepageName, String prefix) { + return """ + ALTER TABLE %s ADD FOREIGN KEY (TILESET_ID) + REFERENCES %sTILESET(KEY) + ON DELETE CASCADE + DEFERRABLE INITIALLY DEFERRED + """ + .formatted(prefixedTilepageName, prefix); } /** - * Oracle does not support {@code ON UPDATE CASCADE} on foreign keys, so the {@code TILEPAGE.TILESET_ID -> TILESET - * .KEY} FK declared above only cascades on delete. As a result this dialect cannot safely rewrite {@code TILESET - * .KEY} during a rename without first dealing with the dangling {@code TILEPAGE} rows. - * - *

For now Oracle keeps the legacy behavior of only updating {@code LAYER_NAME}; lookups by id against the - * renamed layer will continue to miss the row and cause {@code getOrCreateTileSet} to insert duplicates. Fixing - * this on Oracle (e.g. via {@code DEFERRABLE INITIALLY DEFERRED} constraints, or by disabling the FK around the - * rename) is tracked separately. + * PL/SQL anonymous block that rewrites TILESET.KEY and TILEPAGE.TILESET_ID together; the deferred FK is checked + * once at commit with both updates in place. Oracle has no {@code ON UPDATE CASCADE}, hence the manual rewrite, and + * no SQL-standard {@code SUBSTRING ... FROM POSITION(...)}, hence {@code SUBSTR}/{@code INSTR}. */ @Override public String getRenameLayerStatement(String schema, String oldLayerName, String newLayerName) { - StringBuilder sb = new StringBuilder("UPDATE "); - if (schema != null) { - sb.append(schema).append("."); - } - sb.append("TILESET SET LAYER_NAME = :") - .append(newLayerName) - .append(" WHERE LAYER_NAME = :") - .append(oldLayerName); - - return sb.toString(); + String prefix = schema == null ? "" : schema + "."; + return """ + BEGIN + UPDATE %sTILESET + SET KEY = :%s || SUBSTR(KEY, INSTR(KEY, '#')), + LAYER_NAME = :%s + WHERE LAYER_NAME = :%s; + UPDATE %sTILEPAGE + SET TILESET_ID = :%s || SUBSTR(TILESET_ID, INSTR(TILESET_ID, '#')) + WHERE TILESET_ID LIKE :%s || '#%%'; + END; + """ + .formatted(prefix, newLayerName, newLayerName, oldLayerName, prefix, newLayerName, oldLayerName); } @Override diff --git a/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/SQLDialect.java b/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/SQLDialect.java index 4faa406082..d13dbd1cb7 100644 --- a/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/SQLDialect.java +++ b/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/SQLDialect.java @@ -172,26 +172,19 @@ private Void upgradeTilepageForeignKey(DatabaseMetaData dbmd, String schema, Sim try (ResultSet rs = dbmd.getImportedKeys(null, schema, tilepageName)) { while (rs.next()) { String fkName = rs.getString("FK_NAME"); - if (!isTilesetCascadeCandidate(rs, fkName)) { + if (!isTilepageTilesetFkRow(rs, fkName) || tilepageFkIsMigrated(rs)) { continue; } String drop = "ALTER TABLE %s DROP CONSTRAINT %s".formatted(prefixedTilepageName, fkName); - String add = - """ - ALTER TABLE %s ADD FOREIGN KEY (TILESET_ID) - REFERENCES %sTILESET(KEY) - ON UPDATE CASCADE ON DELETE CASCADE - """ - .formatted(prefixedTilepageName, prefix); - - LOG.info(() -> "Upgrading TILEPAGE.TILESET_ID foreign key to ON UPDATE CASCADE (was constraint %s)" - .formatted(fkName)); + String add = tilepageFkAddSql(prefixedTilepageName, prefix); + + LOG.info(() -> "Migrating TILEPAGE.TILESET_ID foreign key (was constraint %s)".formatted(fkName)); JdbcOperations jdbcOperations = template.getJdbcOperations(); try { jdbcOperations.execute(drop); jdbcOperations.execute(add); } catch (DataAccessException raceLikely) { - if (isTilepageFkAlreadyCascade(dbmd, schema, tilepageName)) { + if (isTilepageFkAlreadyMigrated(dbmd, schema, tilepageName)) { LOG.fine(() -> "TILEPAGE FK was migrated concurrently by another instance " + "while this instance was trying to drop %s; accepting concurrent migration" .formatted(fkName)); @@ -204,21 +197,27 @@ private Void upgradeTilepageForeignKey(DatabaseMetaData dbmd, String schema, Sim return null; } - /** - * Re-checks the live TILEPAGE -> TILESET FK state after a failed migration attempt. Returns {@code true} when the - * FK is already declared {@link DatabaseMetaData#importedKeyCascade}, i.e. another instance has completed the - * migration in the meantime. - */ - private static boolean isTilepageFkAlreadyCascade(DatabaseMetaData dbmd, String schema, String tilepageName) + /** Hook: {@code true} when this dialect's FK row already reflects the migrated shape. Oracle overrides. */ + protected boolean tilepageFkIsMigrated(ResultSet rs) throws SQLException { + return rs.getShort("UPDATE_RULE") == DatabaseMetaData.importedKeyCascade; + } + + /** Hook: the {@code ALTER TABLE ... ADD FOREIGN KEY} statement for this dialect's migrated FK shape. */ + protected String tilepageFkAddSql(String prefixedTilepageName, String prefix) { + return """ + ALTER TABLE %s ADD FOREIGN KEY (TILESET_ID) + REFERENCES %sTILESET(KEY) + ON UPDATE CASCADE ON DELETE CASCADE + """ + .formatted(prefixedTilepageName, prefix); + } + + /** Re-checks FK state after a failed migration attempt to detect a concurrent migration from another instance. */ + private boolean isTilepageFkAlreadyMigrated(DatabaseMetaData dbmd, String schema, String tilepageName) throws SQLException { try (ResultSet rs = dbmd.getImportedKeys(null, schema, tilepageName)) { while (rs.next()) { - String pkTable = rs.getString("PKTABLE_NAME"); - String fkColumn = rs.getString("FKCOLUMN_NAME"); - if (!"TILESET".equalsIgnoreCase(pkTable) || !"TILESET_ID".equalsIgnoreCase(fkColumn)) { - continue; - } - if (rs.getShort("UPDATE_RULE") == DatabaseMetaData.importedKeyCascade) { + if (isTilepageTilesetFkRow(rs, rs.getString("FK_NAME")) && tilepageFkIsMigrated(rs)) { return true; } } @@ -226,22 +225,14 @@ private static boolean isTilepageFkAlreadyCascade(DatabaseMetaData dbmd, String return false; } - /** - * True when the current {@code getImportedKeys} row describes the TILEPAGE -> TILESET(KEY) FK and its update rule - * is something other than {@code CASCADE}. - */ - private static boolean isTilesetCascadeCandidate(ResultSet rs, String fkName) throws SQLException { + /** Identity-only check: is this metadata row the TILEPAGE -> TILESET(KEY) FK? */ + private static boolean isTilepageTilesetFkRow(ResultSet rs, String fkName) throws SQLException { if (fkName == null || fkName.isEmpty()) { return false; } String pkTable = rs.getString("PKTABLE_NAME"); String fkColumn = rs.getString("FKCOLUMN_NAME"); - boolean isTilesetFk = "TILESET".equalsIgnoreCase(pkTable) && "TILESET_ID".equalsIgnoreCase(fkColumn); - if (!isTilesetFk) { - return false; - } - short updateRule = rs.getShort("UPDATE_RULE"); - return updateRule != DatabaseMetaData.importedKeyCascade; + return "TILESET".equalsIgnoreCase(pkTable) && "TILESET_ID".equalsIgnoreCase(fkColumn); } private static String resolveTableName(DatabaseMetaData dbmd, String schema, String tableName) throws SQLException { diff --git a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/AbstractForeignKeyMigrationTest.java b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/AbstractForeignKeyMigrationTest.java index f7d95506ff..0820bca82c 100644 --- a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/AbstractForeignKeyMigrationTest.java +++ b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/AbstractForeignKeyMigrationTest.java @@ -35,29 +35,18 @@ import org.junit.Test; /** - * Verifies the legacy-to-current path in {@link SQLDialect#migrateForeignKeys} that drops the existing {@code TILEPAGE - * -> TILESET} foreign key (declared with only {@code ON DELETE CASCADE}) and re-adds it with {@code ON UPDATE CASCADE - * ON DELETE CASCADE}. - * - *

The current {@code JDBCQuotaStoreTest} suite always starts from fresh-DDL tables, so it only exercises the no-op - * idempotent branch of the migration; this test class fills in the upgrade path. - * - *

Each test starts from a "legacy" schema built by stripping {@code " ON UPDATE CASCADE"} from the dialect's own - * table-creation SQL (i.e. the pre-fix shape of the FK). Subclasses provide the dialect and a DataSource pointed at the - * database under test. + * Verifies the upgrade path in {@link SQLDialect#migrateForeignKeys} (the regular IT suite always starts from fresh-DDL + * tables and it'd only sees the no-op branch). Each test starts from a "legacy" schema built by stripping the dialect's + * migrated clause from its own DDL; subclasses plug in the dialect, the DataSource, and the dialect-specific FK + * metadata through the {@code *FkState} / {@link #legacyDdl(String)} hooks. */ public abstract class AbstractForeignKeyMigrationTest { - /** Dialect under test. */ protected abstract SQLDialect dialect(); - /** Data source pointed at a usable database where the legacy schema can be (re)created. */ protected abstract DataSource dataSource(); - /** - * Recreates the legacy schema. Subclasses call this from their {@code @Before} after wiring the data source; not - * annotated so the dialect/dataSource setup ordering is always explicit. - */ + /** Recreates the legacy schema. Subclasses call this from their {@code @Before} after wiring the data source. */ protected void recreateLegacySchema() throws SQLException { try (Connection cx = dataSource().getConnection(); Statement st = cx.createStatement()) { @@ -65,50 +54,56 @@ protected void recreateLegacySchema() throws SQLException { dropIfExists(st, "TILESET"); for (String table : dialect().TABLE_CREATION_MAP.keySet()) { for (String ddl : dialect().TABLE_CREATION_MAP.get(table)) { - String legacy = stripCascadeOnUpdate(ddl); - st.execute(legacy); + st.execute(legacyDdl(ddl)); } } } } - /** - * Reproduces the pre-fix DDL by removing the {@code ON UPDATE CASCADE} clause that was added to the TILEPAGE FK. - */ - private static String stripCascadeOnUpdate(String ddl) { + /** Hook: dialect DDL with its migrated FK clause stripped and {@code ${schema}} substituted. Oracle overrides. */ + protected String legacyDdl(String ddl) { return ddl.replace("${schema}", "").replace(" ON UPDATE CASCADE", ""); } - private static void dropIfExists(Statement st, String table) { + /** Hook: the {@code getImportedKeys} value the migrated FK is expected to settle on. Oracle overrides. */ + protected short expectedMigratedFkState() { + return (short) DatabaseMetaData.importedKeyCascade; + } + + /** Hook: dialect-specific FK metadata column to compare against {@link #expectedMigratedFkState()}. */ + protected short readFkState(ResultSet rs) throws SQLException { + return rs.getShort("UPDATE_RULE"); + } + + /** Hook: {@code DROP TABLE} that handles FK dependents. Oracle needs {@code CASCADE CONSTRAINTS}. */ + protected String dropTableSql(String table) { + return "DROP TABLE " + table + " CASCADE"; + } + + private void dropIfExists(Statement st, String table) { try { - st.execute("DROP TABLE " + table + " CASCADE"); + st.execute(dropTableSql(table)); } catch (SQLException ignored) { // table may not exist on the first run; the legacy CREATEs below recreate it } } @Test - public void migrateAddsOnUpdateCascadeToTilepageForeignKey() throws SQLException { - short ruleBefore = requireTilepageFkUpdateRule(); + public void migrateRewritesTilepageForeignKey() throws SQLException { + short before = requireTilepageFkState(); assertNotEquals( - "Legacy TILEPAGE FK should not yet be ON UPDATE CASCADE", - (short) DatabaseMetaData.importedKeyCascade, - ruleBefore); + "Legacy TILEPAGE FK should not yet be in its migrated state", expectedMigratedFkState(), before); dialect().migrateForeignKeys(null, new SimpleJdbcTemplate(dataSource())); - short ruleAfter = requireTilepageFkUpdateRule(); + short after = requireTilepageFkState(); assertEquals( - "Migration should rewrite TILEPAGE FK as ON UPDATE CASCADE", - (short) DatabaseMetaData.importedKeyCascade, - ruleAfter); + "Migration should rewrite the TILEPAGE FK to its current dialect shape", + expectedMigratedFkState(), + after); } - /** - * Simulates multiple JVMs starting at the same time against a shared database with the legacy FK still in place. - * Both call {@code migrateForeignKeys} concurrently; the migration must remain idempotent end-to-end - neither call - * should propagate an exception, and the final FK state must be cascade-on-update. - */ + /** Simulates concurrent migration from multiple JVMs: every call must succeed, the final FK state is migrated. */ @Test public void migrateIsConcurrentStartupSafe() throws Exception { int threads = 4; @@ -144,43 +139,43 @@ public void migrateIsConcurrentStartupSafe() throws Exception { } assertEquals( - "After concurrent migration the FK should be ON UPDATE CASCADE", - (short) DatabaseMetaData.importedKeyCascade, - requireTilepageFkUpdateRule()); + "After concurrent migration the FK should be in its migrated state", + expectedMigratedFkState(), + requireTilepageFkState()); } @Test public void migrateIsIdempotent() throws SQLException { SimpleJdbcTemplate template = new SimpleJdbcTemplate(dataSource()); dialect().migrateForeignKeys(null, template); - assertEquals((short) DatabaseMetaData.importedKeyCascade, requireTilepageFkUpdateRule()); + assertEquals(expectedMigratedFkState(), requireTilepageFkState()); - // Second invocation must be a no-op (FK already cascade-on-update). + // Second invocation must be a no-op (FK already in its migrated state). dialect().migrateForeignKeys(null, template); - assertEquals((short) DatabaseMetaData.importedKeyCascade, requireTilepageFkUpdateRule()); + assertEquals(expectedMigratedFkState(), requireTilepageFkState()); } - private short requireTilepageFkUpdateRule() throws SQLException { - Short rule = lookupTilepageFkUpdateRule(); - assertNotNull("TILEPAGE -> TILESET foreign key not found in metadata", rule); - return rule; + private short requireTilepageFkState() throws SQLException { + Short state = lookupTilepageFkState(); + assertNotNull("TILEPAGE -> TILESET foreign key not found in metadata", state); + return state; } - private Short lookupTilepageFkUpdateRule() throws SQLException { + private Short lookupTilepageFkState() throws SQLException { try (Connection cx = dataSource().getConnection()) { DatabaseMetaData dbmd = cx.getMetaData(); - Short rule = findTilesetFkUpdateRule(dbmd, "tilepage"); - return rule != null ? rule : findTilesetFkUpdateRule(dbmd, "TILEPAGE"); + Short state = findTilesetFkState(dbmd, "tilepage"); + return state != null ? state : findTilesetFkState(dbmd, "TILEPAGE"); } } - private static Short findTilesetFkUpdateRule(DatabaseMetaData dbmd, String tableName) throws SQLException { + private Short findTilesetFkState(DatabaseMetaData dbmd, String tableName) throws SQLException { try (ResultSet rs = dbmd.getImportedKeys(null, null, tableName)) { while (rs.next()) { String pkTable = rs.getString("PKTABLE_NAME"); String fkColumn = rs.getString("FKCOLUMN_NAME"); if ("TILESET".equalsIgnoreCase(pkTable) && "TILESET_ID".equalsIgnoreCase(fkColumn)) { - return rs.getShort("UPDATE_RULE"); + return readFkState(rs); } } } diff --git a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/AbstractJDBCQuotaStoreConcurrencyTest.java b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/AbstractJDBCQuotaStoreConcurrencyTest.java new file mode 100644 index 0000000000..45052a79da --- /dev/null +++ b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/AbstractJDBCQuotaStoreConcurrencyTest.java @@ -0,0 +1,203 @@ +/** + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + *

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + *

You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + * + *

Copyright 2026 + */ +package org.geowebcache.diskquota.jdbc; + +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.math.BigInteger; +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import javax.sql.DataSource; +import org.apache.commons.dbcp.BasicDataSource; +import org.geowebcache.diskquota.storage.PageStatsPayload; +import org.geowebcache.diskquota.storage.Quota; +import org.geowebcache.diskquota.storage.TilePage; +import org.geowebcache.diskquota.storage.TilePageCalculator; +import org.geowebcache.diskquota.storage.TileSet; +import org.geowebcache.storage.DefaultStorageFinder; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Concurrency suite for {@link JDBCQuotaStore}, run by each dialect's subclass. + * + *

Verifies the two invariants the retry layer must hold: no {@code DataAccessException} escapes, and the ledger + * total matches the sum of deltas exactly (drift means an abort was dropped instead of replayed). On engines that + * serialize on row locks (HSQL) the abort path isn't exercised, but the scenarios still pass. + */ +public abstract class AbstractJDBCQuotaStoreConcurrencyTest { + + protected static final int THREAD_COUNT = 4; + protected static final int ITERATIONS_PER_THREAD = 200; + protected static final long BYTES_PER_ITERATION = 1024L; + + protected DataSource dataSource; + + protected JDBCQuotaStore store; + protected TileSet tileSet; + protected TilePage tilePage; + + protected abstract DataSource newDataSource() throws Exception; + + protected abstract SQLDialect newDialect(); + + /** Drops the schema tables for a clean state. Oracle overrides; it needs {@code CASCADE CONSTRAINTS}. */ + protected void cleanupDatabase(DataSource ds) throws SQLException { + try (Connection cx = ds.getConnection(); + Statement st = cx.createStatement()) { + try { + st.execute("DROP TABLE TILEPAGE CASCADE"); + } catch (SQLException ignored) { + // table may not exist on first run + } + try { + st.execute("DROP TABLE TILESET CASCADE"); + } catch (SQLException ignored) { + // table may not exist on first run + } + } + } + + /** Pool sized for {@link #THREAD_COUNT} writers plus headroom; short max-wait so deadlocked tests fail fast. */ + protected static BasicDataSource newPooledDataSource(String driver, String url, String user, String password) { + BasicDataSource ds = new BasicDataSource(); + ds.setDriverClassName(driver); + ds.setUrl(url); + ds.setUsername(user); + ds.setPassword(password); + ds.setPoolPreparedStatements(true); + ds.setAccessToUnderlyingConnectionAllowed(true); + ds.setMinIdle(1); + ds.setMaxActive(THREAD_COUNT + 2); + ds.setMaxWait(5000); + return ds; + } + + @Before + public final void setUpStore() throws Exception { + dataSource = newDataSource(); + cleanupDatabase(dataSource); + + DefaultStorageFinder finder = mock(DefaultStorageFinder.class); + TilePageCalculator calculator = mock(TilePageCalculator.class); + when(calculator.getLayerNames()).thenReturn(Collections.emptySet()); + when(calculator.getTilesPerPage(any(TileSet.class), anyInt())).thenReturn(BigInteger.valueOf(1_000_000)); + + store = new JDBCQuotaStore(finder, calculator); + store.setDataSource(dataSource); + store.setDialect(newDialect()); + store.initialize(); + + tileSet = new TileSet("layer", "EPSG:4326", "image/png", null); + tilePage = new TilePage(tileSet.getId(), 0, 0, 0); + + // Pre-create both rows so the contention is row-update, not row-insert. + store.addToQuotaAndTileCounts(tileSet, new Quota(BigInteger.ZERO), Collections.singletonList(payload(1))); + } + + @After + public final void tearDownStore() throws Exception { + if (store != null) { + store.close(); + } + } + + /** Drift in the final total means an aborted transaction was dropped instead of replayed. */ + @Test(timeout = 120_000) + public void concurrentAddToQuota_doesNotDriftUnderSerializable() throws Exception { + ExecutorService pool = Executors.newFixedThreadPool(THREAD_COUNT); + CountDownLatch start = new CountDownLatch(1); + List> futures = new ArrayList<>(); + for (int t = 0; t < THREAD_COUNT; t++) { + futures.add(pool.submit(() -> { + start.await(); + Quota delta = new Quota(BigInteger.valueOf(BYTES_PER_ITERATION)); + for (int i = 0; i < ITERATIONS_PER_THREAD; i++) { + store.addToQuotaAndTileCounts(tileSet, delta, Collections.singletonList(payload(1))); + } + return null; + })); + } + start.countDown(); + try { + for (Future f : futures) { + f.get(); + } + } finally { + pool.shutdown(); + pool.awaitTermination(10, TimeUnit.SECONDS); + } + + BigInteger expected = BigInteger.valueOf((long) THREAD_COUNT * ITERATIONS_PER_THREAD * BYTES_PER_ITERATION); + assertEquals( + "TILESET.BYTES drifted: aborted transactions were not replayed", + expected, + store.getUsedQuotaByTileSetId(tileSet.getId()).getBytes()); + assertEquals( + "Global TILESET.BYTES drifted: aborted transactions were not replayed", + expected, + store.getGloballyUsedQuota().getBytes()); + } + + /** Truncation is idempotent, so the assertion is negative: no abort exception escapes. */ + @Test(timeout = 120_000) + public void concurrentSetTruncated_doesNotThrowUnderSerializable() throws Exception { + // Bring the page row into a state where setTruncated has work to do (fillFactor > 0). + store.addToQuotaAndTileCounts(tileSet, new Quota(BigInteger.ZERO), Collections.singletonList(payload(100))); + + int truncators = 2; + int iterations = 100; + ExecutorService pool = Executors.newFixedThreadPool(truncators); + CountDownLatch start = new CountDownLatch(1); + List> futures = new ArrayList<>(); + for (int t = 0; t < truncators; t++) { + futures.add(pool.submit(() -> { + start.await(); + for (int i = 0; i < iterations; i++) { + store.setTruncated(tilePage); + } + return null; + })); + } + start.countDown(); + try { + for (Future f : futures) { + f.get(); + } + } finally { + pool.shutdown(); + pool.awaitTermination(10, TimeUnit.SECONDS); + } + } + + protected PageStatsPayload payload(int numTiles) { + PageStatsPayload p = new PageStatsPayload(tilePage); + p.setNumTiles(numTiles); + return p; + } +} diff --git a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/HSQLQuotaStoreConcurrencyTest.java b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/HSQLQuotaStoreConcurrencyTest.java new file mode 100644 index 0000000000..18842e5ff7 --- /dev/null +++ b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/HSQLQuotaStoreConcurrencyTest.java @@ -0,0 +1,36 @@ +/** + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + *

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + *

You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + * + *

Copyright 2026 + */ +package org.geowebcache.diskquota.jdbc; + +import javax.sql.DataSource; + +/** + * In-memory HSQL run of {@link AbstractJDBCQuotaStoreConcurrencyTest}. HSQL serializes writers via row locks rather + * than aborting them, so this only exercises the no-conflict path; PG/Oracle ITs cover the abort/retry path. + */ +public class HSQLQuotaStoreConcurrencyTest extends AbstractJDBCQuotaStoreConcurrencyTest { + + private static int INSTANCE_COUNTER = 0; + + @Override + protected DataSource newDataSource() { + return newPooledDataSource( + "org.hsqldb.jdbcDriver", "jdbc:hsqldb:mem:concurrency-" + (++INSTANCE_COUNTER), "sa", ""); + } + + @Override + protected SQLDialect newDialect() { + return new HSQLDialect(); + } +} diff --git a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/JDBCQuotaStoreRetryTest.java b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/JDBCQuotaStoreRetryTest.java new file mode 100644 index 0000000000..0b8260f7ab --- /dev/null +++ b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/JDBCQuotaStoreRetryTest.java @@ -0,0 +1,108 @@ +/** + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + *

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + *

You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + * + *

Copyright 2026 + */ +package org.geowebcache.diskquota.jdbc; + +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.math.BigInteger; +import java.util.Collections; +import org.geowebcache.diskquota.storage.Quota; +import org.geowebcache.diskquota.storage.TilePageCalculator; +import org.geowebcache.diskquota.storage.TileSet; +import org.geowebcache.storage.DefaultStorageFinder; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.springframework.dao.CannotAcquireLockException; +import org.springframework.transaction.support.TransactionCallback; +import org.springframework.transaction.support.TransactionTemplate; + +/** Offline tests for the retry helper in {@link JDBCQuotaStore}: cap exhaustion, interrupt handling, and skip-paths. */ +@SuppressWarnings("unchecked") // raw TransactionCallback in Mockito matchers +public class JDBCQuotaStoreRetryTest { + + private JDBCQuotaStore store; + private TransactionTemplate tt; + private TileSet tileSet; + + @Before + public void setUp() { + store = new JDBCQuotaStore(mock(DefaultStorageFinder.class), mock(TilePageCalculator.class)); + tt = mock(TransactionTemplate.class); + store.tt = tt; + // Keep test runtime small: 3 attempts, 1ms initial backoff. + store.maxTransactionAttempts = 3; + store.initialTransactionBackoffMs = 1L; + tileSet = new TileSet("layer", "EPSG:4326", "image/png", null); + } + + @After + public void tearDown() { + // Clear any interrupt left behind by the interrupt test so it doesn't leak to other tests. + Thread.interrupted(); + } + + @Test + public void retryExhaustionPropagatesOriginalException() throws Exception { + CannotAcquireLockException abort = new CannotAcquireLockException("simulated SSI abort"); + when(tt.execute(any(TransactionCallback.class))).thenThrow(abort); + + try { + store.addToQuotaAndTileCounts(tileSet, new Quota(BigInteger.ZERO), Collections.emptyList()); + fail("expected CannotAcquireLockException after retry exhaustion"); + } catch (CannotAcquireLockException actual) { + assertSame(abort, actual); + } + verify(tt, times(store.maxTransactionAttempts)).execute(any(TransactionCallback.class)); + } + + @Test + public void interruptDuringRetryBackoffPropagatesImmediately() throws Exception { + CannotAcquireLockException abort = new CannotAcquireLockException("simulated SSI abort"); + when(tt.execute(any(TransactionCallback.class))).thenThrow(abort); + + Thread.currentThread().interrupt(); + try { + store.addToQuotaAndTileCounts(tileSet, new Quota(BigInteger.ZERO), Collections.emptyList()); + fail("expected CannotAcquireLockException to propagate when interrupted mid-retry"); + } catch (CannotAcquireLockException actual) { + assertSame(abort, actual); + assertTrue( + "interrupt flag must be preserved", Thread.currentThread().isInterrupted()); + } + // Only one tt.execute call: first attempt fails, backoff sleep sees the interrupt and bails out. + verify(tt, times(1)).execute(any(TransactionCallback.class)); + } + + @Test + public void nonConcurrencyExceptionIsNotRetried() throws Exception { + IllegalStateException nonRetryable = new IllegalStateException("not a retryable abort"); + when(tt.execute(any(TransactionCallback.class))).thenThrow(nonRetryable); + + try { + store.addToQuotaAndTileCounts(tileSet, new Quota(BigInteger.ZERO), Collections.emptyList()); + fail("expected IllegalStateException to propagate without retry"); + } catch (IllegalStateException actual) { + assertSame(nonRetryable, actual); + } + verify(tt, times(1)).execute(any(TransactionCallback.class)); + } +} diff --git a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleForeignKeyMigrationIT.java b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleForeignKeyMigrationIT.java new file mode 100644 index 0000000000..8bccfa1eb3 --- /dev/null +++ b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleForeignKeyMigrationIT.java @@ -0,0 +1,89 @@ +/** + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + *

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + *

You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + * + *

Copyright 2026 + */ +package org.geowebcache.diskquota.jdbc.tests.container; + +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; +import javax.sql.DataSource; +import org.apache.commons.dbcp.BasicDataSource; +import org.geowebcache.diskquota.jdbc.AbstractForeignKeyMigrationTest; +import org.geowebcache.diskquota.jdbc.OracleDialect; +import org.geowebcache.diskquota.jdbc.SQLDialect; +import org.geowebcache.testcontainers.jdbc.OracleXEContainer; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; + +/** + * Runs {@link AbstractForeignKeyMigrationTest} against Oracle XE via Testcontainers. + * + *

The migrated FK is {@code DEFERRABLE INITIALLY DEFERRED} (Oracle has no {@code ON UPDATE CASCADE}) + */ +public class OracleForeignKeyMigrationIT extends AbstractForeignKeyMigrationTest { + + @ClassRule + public static final OracleXEContainer ORACLE = OracleXEContainer.latest().disabledWithoutDocker(); + + private BasicDataSource dataSource; + + @Before + public void setUpDataSource() throws Exception { + dataSource = new BasicDataSource(); + dataSource.setDriverClassName(ORACLE.getDriverClassName()); + dataSource.setUrl(ORACLE.getJdbcUrl()); + dataSource.setUsername(ORACLE.getUsername()); + dataSource.setPassword(ORACLE.getPassword()); + recreateLegacySchema(); + } + + @After + public void tearDown() throws Exception { + if (dataSource != null) { + dataSource.close(); + dataSource = null; + } + } + + @Override + protected SQLDialect dialect() { + return new OracleDialect(); + } + + @Override + protected DataSource dataSource() { + return dataSource; + } + + @Override + protected String legacyDdl(String ddl) { + return ddl.replace("${schema}", "").replaceAll("\\s*DEFERRABLE INITIALLY DEFERRED", ""); + } + + @Override + protected short expectedMigratedFkState() { + return (short) DatabaseMetaData.importedKeyInitiallyDeferred; + } + + @Override + protected short readFkState(ResultSet rs) throws SQLException { + return rs.getShort("DEFERRABILITY"); + } + + /** Oracle requires {@code CASCADE CONSTRAINTS} (not just {@code CASCADE}) to drop tables with FK dependents. */ + @Override + protected String dropTableSql(String table) { + return "DROP TABLE " + table + " CASCADE CONSTRAINTS"; + } +} diff --git a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleQuotaStoreConcurrencyIT.java b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleQuotaStoreConcurrencyIT.java new file mode 100644 index 0000000000..45d1aeaea2 --- /dev/null +++ b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleQuotaStoreConcurrencyIT.java @@ -0,0 +1,63 @@ +/** + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + *

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + *

You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + * + *

Copyright 2026 + */ +package org.geowebcache.diskquota.jdbc.tests.container; + +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; +import javax.sql.DataSource; +import org.geowebcache.diskquota.jdbc.AbstractJDBCQuotaStoreConcurrencyTest; +import org.geowebcache.diskquota.jdbc.OracleDialect; +import org.geowebcache.diskquota.jdbc.SQLDialect; +import org.geowebcache.testcontainers.jdbc.OracleXEContainer; +import org.junit.ClassRule; + +/** + * Runs {@link AbstractJDBCQuotaStoreConcurrencyTest} against Oracle XE via Testcontainers. Oracle throws ORA-08176 + * (consistent-read failure across recent DDL) and ORA-08177 (serialization failure); both go through the retry layer. + */ +public class OracleQuotaStoreConcurrencyIT extends AbstractJDBCQuotaStoreConcurrencyTest { + + @ClassRule + public static final OracleXEContainer ORACLE = OracleXEContainer.latest().disabledWithoutDocker(); + + @Override + protected DataSource newDataSource() { + return newPooledDataSource( + ORACLE.getDriverClassName(), ORACLE.getJdbcUrl(), ORACLE.getUsername(), ORACLE.getPassword()); + } + + @Override + protected SQLDialect newDialect() { + return new OracleDialect(); + } + + /** Oracle requires {@code CASCADE CONSTRAINTS} (not just {@code CASCADE}) to drop tables with FK dependents. */ + @Override + protected void cleanupDatabase(DataSource ds) throws SQLException { + try (Connection cx = ds.getConnection(); + Statement st = cx.createStatement()) { + try { + st.execute("DROP TABLE TILEPAGE CASCADE CONSTRAINTS"); + } catch (SQLException ignored) { + // table may not exist on first run + } + try { + st.execute("DROP TABLE TILESET CASCADE CONSTRAINTS"); + } catch (SQLException ignored) { + // table may not exist on first run + } + } + } +} diff --git a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleQuotaStoreIT.java b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleQuotaStoreIT.java index 4b80fd7356..34e1535e17 100644 --- a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleQuotaStoreIT.java +++ b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/OracleQuotaStoreIT.java @@ -21,53 +21,9 @@ import org.geowebcache.diskquota.jdbc.SQLDialect; import org.geowebcache.testcontainers.jdbc.OracleXEContainer; import org.junit.ClassRule; -import org.junit.Ignore; import org.testcontainers.containers.JdbcDatabaseContainer; -/** - * Runs the full {@code JDBCQuotaStoreTest} suite against a real Oracle Express Edition via Testcontainers. - * - *

If Docker is unavailable the class is skipped cleanly through {@link OracleXEContainer#disabledWithoutDocker()}. - * - *

This is {@link Ignore @Ignore}d for now

- * - *

Running the suite against Oracle XE 21c surfaces a longstanding gap, not specific to this CI plumbing: any - * SERIALIZABLE transaction in {@code JDBCQuotaStore} whose first read goes through TILEPAGE (or, less often, TILESET) - * fails with {@code ORA-08176: consistent read failure; - * rollback data not available}. Oracle's own diagnostic for this error names the cause: "Encountered data - * changed by an operation that does not generate rollback data: create index, direct load or discrete - * transaction." The quota store creates four indexes on TILEPAGE at startup, and Oracle XE's snapshot machinery - * cannot reconstruct a consistent read across that recent DDL within a SERIALIZABLE transaction. - * - *

The Oracle-recommended remedy is to retry the transaction so a fresh snapshot SCN is taken. Once - * {@code JDBCQuotaStore} wraps each {@code tt.execute(...)} with bounded retry on serialization failures, ORA-08176 - * will succeed on a re-attempt - exactly the pattern Oracle's diagnostics recommend. - * - *

Concretely, what we observed running this IT against {@code gvenzl/oracle-xe:21-slim-faststart}: - * - *

    - *
  • 10/19 tests error with ORA-08176 on {@code INSERT INTO TILEPAGE ... SELECT ... FROM DUAL WHERE NOT EXISTS} - * inside the SERIALIZABLE runtime path ({@code addToQuotaAndTileCounts}, {@code addHitsAndSetAccesTime}, - * {@code setTruncated}). - *
  • The remaining 9 either touch TILESET only or fire async writes without awaiting them, so they don't observe the - * failure. - *
  • Rewriting the conditional INSERTs as Oracle {@code MERGE INTO} immediately surfaces a second issue (Spring's - * named-parameter binding can't decide a SQL type for a {@code null} parametersId, hitting ORA-17004), and even - * with that fixed the underlying snapshot read on freshly indexed tables is the real blocker. The retry layer is - * the right level to fix this. - *
- * - *

The class is kept in the suite (rather than deleted) so: - * - *

    - *
  1. The CI workflow's claim that it covers the Oracle dialect via Testcontainers stays honest: the infrastructure - * is in place; only the {@code @Ignore} flips off when the retry layer lands. - *
  2. The Oracle XE container plumbing is exercised by the workflow at least up to {@code @ClassRule} startup, so it - * doesn't bit-rot. - *
  3. The next person to look at Oracle support has a working scaffold and a clear pointer at the root cause. - *
- */ -@Ignore("Pending the SERIALIZABLE retry layer; see class javadoc for ORA-08176 root cause.") +/** Runs the full {@code JDBCQuotaStoreTest} suite against Oracle XE via Testcontainers. */ public class OracleQuotaStoreIT extends AbstractJDBCQuotaStoreIT { @ClassRule diff --git a/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/PostgreSQLQuotaStoreConcurrencyIT.java b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/PostgreSQLQuotaStoreConcurrencyIT.java new file mode 100644 index 0000000000..7b8aabe10d --- /dev/null +++ b/geowebcache/diskquota/jdbc/src/test/java/org/geowebcache/diskquota/jdbc/tests/container/PostgreSQLQuotaStoreConcurrencyIT.java @@ -0,0 +1,42 @@ +/** + * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General + * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + *

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + *

You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + * + *

Copyright 2026 + */ +package org.geowebcache.diskquota.jdbc.tests.container; + +import javax.sql.DataSource; +import org.geowebcache.diskquota.jdbc.AbstractJDBCQuotaStoreConcurrencyTest; +import org.geowebcache.diskquota.jdbc.PostgreSQLDialect; +import org.geowebcache.diskquota.jdbc.SQLDialect; +import org.geowebcache.testcontainers.jdbc.PostgresContainer; +import org.junit.ClassRule; + +/** + * Runs {@link AbstractJDBCQuotaStoreConcurrencyTest} against a PostgreSQL testcontainer; this is where the retry layer + * is most exercised, since Postgres SSI results in serialization aborts under contention. + */ +public class PostgreSQLQuotaStoreConcurrencyIT extends AbstractJDBCQuotaStoreConcurrencyTest { + + @ClassRule + public static final PostgresContainer POSTGRES = PostgresContainer.latest().disabledWithoutDocker(); + + @Override + protected DataSource newDataSource() { + return newPooledDataSource( + POSTGRES.getDriverClassName(), POSTGRES.getJdbcUrl(), POSTGRES.getUsername(), POSTGRES.getPassword()); + } + + @Override + protected SQLDialect newDialect() { + return new PostgreSQLDialect(); + } +} diff --git a/geowebcache/diskquota/jdbc/src/test/resources/log4j2-test.xml b/geowebcache/diskquota/jdbc/src/test/resources/log4j2-test.xml index 72d91e17c9..4c06ba7976 100644 --- a/geowebcache/diskquota/jdbc/src/test/resources/log4j2-test.xml +++ b/geowebcache/diskquota/jdbc/src/test/resources/log4j2-test.xml @@ -21,6 +21,10 @@ + + + + From 2bc346d9ecc64b658ac61a748fe85ef7e12e2b7a Mon Sep 17 00:00:00 2001 From: Gabriel Roldan Date: Mon, 18 May 2026 15:48:46 -0300 Subject: [PATCH 2/2] diskquota/jdbc: poll on FK migration race recovery Concurrent migrateForeignKeys callers could fail OracleForeignKey MigrationIT.migrateIsConcurrentStartupSafe sporadically. Oracle's ALTER TABLE uses NOWAIT, and a peer thread that gets ORA-00054 on DROP CONSTRAINT could re-check the FK state while the peer was still between its drop and its add, observe an in-between state, and rethrow. on-behalf-of: @camptocamp --- .../diskquota/jdbc/SQLDialect.java | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/SQLDialect.java b/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/SQLDialect.java index d13dbd1cb7..2b61d22139 100644 --- a/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/SQLDialect.java +++ b/geowebcache/diskquota/jdbc/src/main/java/org/geowebcache/diskquota/jdbc/SQLDialect.java @@ -184,7 +184,7 @@ private Void upgradeTilepageForeignKey(DatabaseMetaData dbmd, String schema, Sim jdbcOperations.execute(drop); jdbcOperations.execute(add); } catch (DataAccessException raceLikely) { - if (isTilepageFkAlreadyMigrated(dbmd, schema, tilepageName)) { + if (awaitConcurrentMigration(dbmd, schema, tilepageName)) { LOG.fine(() -> "TILEPAGE FK was migrated concurrently by another instance " + "while this instance was trying to drop %s; accepting concurrent migration" .formatted(fkName)); @@ -212,6 +212,31 @@ protected String tilepageFkAddSql(String prefixedTilepageName, String prefix) { .formatted(prefixedTilepageName, prefix); } + private static final int CONCURRENT_MIGRATION_RECHECK_ATTEMPTS = 20; + private static final long CONCURRENT_MIGRATION_RECHECK_BACKOFF_MS = 100L; + + /** + * Polls {@link #isTilepageFkAlreadyMigrated} after a failed drop/add to absorb the window in which a peer instance + * is mid-migration. Oracle's {@code ALTER TABLE} uses NOWAIT, so a concurrent peer can fail us with ORA-00054 while + * it still has the drop committed but not yet the add; a one-shot recheck would observe the in-flight state and + * give up. + */ + private boolean awaitConcurrentMigration(DatabaseMetaData dbmd, String schema, String tilepageName) + throws SQLException { + for (int attempt = 0; attempt < CONCURRENT_MIGRATION_RECHECK_ATTEMPTS; attempt++) { + if (isTilepageFkAlreadyMigrated(dbmd, schema, tilepageName)) { + return true; + } + try { + Thread.sleep(CONCURRENT_MIGRATION_RECHECK_BACKOFF_MS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return false; + } + } + return false; + } + /** Re-checks FK state after a failed migration attempt to detect a concurrent migration from another instance. */ private boolean isTilepageFkAlreadyMigrated(DatabaseMetaData dbmd, String schema, String tilepageName) throws SQLException {