-
Notifications
You must be signed in to change notification settings - Fork 3
#692 Refactor the usage of bookeeping database with Slick framework so it can be DB engine agnostic #709
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#692 Refactor the usage of bookeeping database with Slick framework so it can be DB engine agnostic #709
Changes from all commits
b4efcb7
5c1892a
582850f
0ba67b9
e2fd324
bc9af77
c02a1a1
5c4b55d
091add6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,13 +18,9 @@ package za.co.absa.pramen.core.bookkeeper | |
|
|
||
| import org.apache.spark.sql.types.StructType | ||
| import org.slf4j.LoggerFactory | ||
| import slick.jdbc.JdbcBackend.Database | ||
| import slick.jdbc.JdbcProfile | ||
| import za.co.absa.pramen.core.bookkeeper.model._ | ||
| import za.co.absa.pramen.core.model.{DataChunk, TableSchema} | ||
| import za.co.absa.pramen.core.rdb.PramenDb | ||
| import za.co.absa.pramen.core.rdb.PramenDb.DEFAULT_RETRIES | ||
| import za.co.absa.pramen.core.reader.JdbcUrlSelector | ||
| import za.co.absa.pramen.core.reader.model.JdbcConfig | ||
| import za.co.absa.pramen.core.utils.SlickUtils.WARN_IF_LONGER_MS | ||
| import za.co.absa.pramen.core.utils.{AlgorithmUtils, SlickUtils, TimeUtils} | ||
|
|
@@ -33,33 +29,34 @@ import java.time.LocalDate | |
| import java.util.concurrent.atomic.AtomicBoolean | ||
| import scala.util.control.NonFatal | ||
|
|
||
| class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends BookkeeperBase(true, batchId) { | ||
| import profile.api._ | ||
| class BookkeeperJdbc(pramenDb: PramenDb, batchId: Long, autoCloseDb: Boolean) extends BookkeeperBase(true, batchId) { | ||
| import pramenDb.slickProfile.api._ | ||
| import za.co.absa.pramen.core.utils.FutureImplicits._ | ||
|
|
||
| private val log = LoggerFactory.getLogger(this.getClass) | ||
| private val offsetManagement = new OffsetManagerCached(new OffsetManagerJdbc(db, batchId)) | ||
| private val isClosed = new AtomicBoolean(false) | ||
| private val log = LoggerFactory.getLogger(this.getClass) | ||
| private val slickUtils = new SlickUtils(pramenDb.slickProfile) | ||
| private val offsetManagement = new OffsetManagerCached(new OffsetManagerJdbc(pramenDb.slickDb, pramenDb.slickProfile, pramenDb.offsetTable, batchId)) | ||
|
|
||
| override val bookkeepingEnabled: Boolean = true | ||
|
|
||
| override def getLatestProcessedDateFromStorage(table: String, until: Option[LocalDate]): Option[LocalDate] = { | ||
| val query = until match { | ||
| case Some(endDate) => | ||
| val endDateStr = DataChunk.dateFormatter.format(endDate) | ||
| BookkeepingRecords.records | ||
| pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === table && r.infoDate <= endDateStr) | ||
| .sortBy(r => (r.infoDate.desc, r.jobFinished.desc)) | ||
| .take(1) | ||
| case None => | ||
| BookkeepingRecords.records | ||
| pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === table) | ||
| .sortBy(r => (r.infoDate.desc, r.jobFinished.desc)) | ||
| .take(1) | ||
| } | ||
|
|
||
| val chunks = try { | ||
| SlickUtils.executeQuery[BookkeepingRecords, BookkeepingRecord](db, query) | ||
| slickUtils.executeQuery(pramenDb.slickDb, query) | ||
| .map(DataChunk.fromRecord) | ||
| } catch { | ||
| case NonFatal(ex) => throw new RuntimeException(s"Unable to read from the bookkeeping table.", ex) | ||
|
|
@@ -77,7 +74,7 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| val query = getFilter(table, Option(infoDate), Option(infoDate), batchId) | ||
|
|
||
| try { | ||
| SlickUtils.executeQuery[BookkeepingRecords, BookkeepingRecord](db, query) | ||
| slickUtils.executeQuery(pramenDb.slickDb, query) | ||
| .map(DataChunk.fromRecord) | ||
| .toArray[DataChunk] | ||
| .sortBy(_.jobFinished) | ||
|
|
@@ -92,7 +89,7 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| .take(1) | ||
|
|
||
| try { | ||
| val records = SlickUtils.executeQuery[BookkeepingRecords, BookkeepingRecord](db, query) | ||
| val records = slickUtils.executeQuery(pramenDb.slickDb, query) | ||
| .map(DataChunk.fromRecord) | ||
| .toArray[DataChunk] | ||
|
|
||
|
|
@@ -109,7 +106,7 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| .length | ||
|
|
||
| val count = try { | ||
| SlickUtils.executeCount(db, query) | ||
| slickUtils.executeCount(pramenDb.slickDb, query) | ||
| } catch { | ||
| case NonFatal(ex) => throw new RuntimeException(s"Unable to read from the bookkeeping table.", ex) | ||
| } | ||
|
|
@@ -126,7 +123,7 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| .sortBy(_._1) | ||
|
|
||
| try { | ||
| SlickUtils.executeQuery[(Rep[String], Rep[Int], Rep[Long]), (String, Int, Long)](db, query) | ||
| slickUtils.executeQuery[(Rep[String], Rep[Int], Rep[Long]), (String, Int, Long)](pramenDb.slickDb, query) | ||
| .map { case (infoDateStr, recordCount, outputRecordCount) => | ||
| val infoDate = LocalDate.parse(infoDateStr, DataChunk.dateFormatter) | ||
| DataAvailability(infoDate, recordCount, outputRecordCount) | ||
|
|
@@ -148,9 +145,9 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| val record = BookkeepingRecord(table, dateStr, dateStr, dateStr, inputRecordCount, outputRecordCount, recordsAppended, jobStarted, jobFinished, Option(batchId)) | ||
|
|
||
| try { | ||
| SlickUtils.ensureDbConnected(db) | ||
| db.run( | ||
| BookkeepingRecords.records += record | ||
| slickUtils.ensureDbConnected(pramenDb.slickDb) | ||
| pramenDb.slickDb.run( | ||
| pramenDb.bookkeepingTable.records += record | ||
| ).execute() | ||
| } catch { | ||
| case NonFatal(ex) => throw new RuntimeException(s"Unable to write to the bookkeeping table.", ex) | ||
|
|
@@ -160,13 +157,13 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| override def deleteNonCurrentBatchRecords(table: String, infoDate: LocalDate): Unit = { | ||
| val dateStr = DataChunk.dateFormatter.format(infoDate) | ||
|
|
||
| val query = BookkeepingRecords.records | ||
| val query = pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === table && r.infoDate === dateStr && r.batchId =!= Option(batchId)) | ||
| .delete | ||
|
|
||
| try { | ||
| AlgorithmUtils.runActionWithElapsedTimeEvent(WARN_IF_LONGER_MS) { | ||
| db.run(query).execute() | ||
| pramenDb.slickDb.run(query).execute() | ||
| } { actualTimeMs => | ||
| val elapsedTime = TimeUtils.prettyPrintElapsedTimeShort(actualTimeMs) | ||
| val sql = query.statements.mkString("; ") | ||
|
|
@@ -199,37 +196,37 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| else | ||
| s"'$tableNameTrimmed' or '$likePattern'" | ||
|
|
||
| val listQuery = BookkeepingRecords.records | ||
| val listQuery = pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === tableNameTrimmed || r.pramenTableName.like(likePattern, escape)) | ||
| .map(_.pramenTableName) | ||
| .distinct | ||
|
|
||
| val tablesToDelete = SlickUtils.executeQuery(db, listQuery).sorted | ||
| val tablesToDelete = slickUtils.executeQuery(pramenDb.slickDb, listQuery).sorted | ||
|
|
||
| if (tablesToDelete.length > 100) | ||
| throw new IllegalArgumentException(s"The table wildcard '$tableName' matches more than 100 tables (${tablesToDelete.length}). To avoid accidental deletions, please refine the wildcard.") | ||
|
Comment on lines
+199
to
207
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Line 204 executes a DB query but is not covered by the 🛡️ Proposed fix: extend the try-catch to cover the list query- val tablesToDelete = slickUtils.executeQuery(pramenDb.slickDb, listQuery).sorted
-
- if (tablesToDelete.length > 100)
- throw new IllegalArgumentException(...)
-
- val deletionQuery = ...
-
- try {
- val deletedBkCount = ...
+ val tablesToDelete = try {
+ slickUtils.executeQuery(pramenDb.slickDb, listQuery).sorted
+ } catch {
+ case NonFatal(ex) => throw new RuntimeException(s"Unable to list tables matching '$patternForLogging' from the bookkeeping table.", ex)
+ }
+
+ if (tablesToDelete.length > 100)
+ throw new IllegalArgumentException(...)
+
+ val deletionQuery = ...
+
+ try {
+ val deletedBkCount = ...🤖 Prompt for AI Agents |
||
|
|
||
| val deletionQuery = BookkeepingRecords.records | ||
| val deletionQuery = pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === tableNameTrimmed || r.pramenTableName.like(likePattern, escape)) | ||
| .delete | ||
|
|
||
| try { | ||
| val deletedBkCount = SlickUtils.executeAction(db, deletionQuery) | ||
| val deletedBkCount = slickUtils.executeAction(pramenDb.slickDb, deletionQuery) | ||
| log.info(s"Deleted $deletedBkCount records from the bookkeeping table for tables matching $patternForLogging: ${tablesToDelete.mkString(", ")}") | ||
|
|
||
| val deletedSchemaCount = SlickUtils.executeAction(db, SchemaRecords.records | ||
| val deletedSchemaCount = slickUtils.executeAction(pramenDb.slickDb, pramenDb.schemaTable.records | ||
| .filter(r => r.pramenTableName === tableNameTrimmed || r.pramenTableName.like(likePattern, escape)) | ||
| .delete | ||
| ) | ||
| log.info(s"Deleted $deletedSchemaCount records from the schemas table.") | ||
|
|
||
| val deletedOffsetsCount = SlickUtils.executeAction(db, OffsetRecords.records | ||
| val deletedOffsetsCount = slickUtils.executeAction(pramenDb.slickDb, pramenDb.offsetTable.records | ||
| .filter(r => r.pramenTableName === tableNameTrimmed || r.pramenTableName.like(likePattern, escape)) | ||
| .delete | ||
| ) | ||
| log.info(s"Deleted $deletedOffsetsCount records from the offsets table.") | ||
|
|
||
| val deletedMetadataCount = SlickUtils.executeAction(db, MetadataRecords.records | ||
| val deletedMetadataCount = slickUtils.executeAction(pramenDb.slickDb, pramenDb.metadataTable.records | ||
| .filter(r => r.pramenTableName === tableNameTrimmed || r.pramenTableName.like(likePattern, escape)) | ||
| .delete | ||
| ) | ||
|
|
@@ -242,40 +239,40 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| } | ||
|
|
||
| override def close(): Unit = { | ||
| if (isClosed.compareAndSet(false, true)) { | ||
| db.close() | ||
| if (autoCloseDb && isClosed.compareAndSet(false, true)) { | ||
| pramenDb.close() | ||
| } | ||
| } | ||
|
|
||
| private[pramen] override def getOffsetManager: OffsetManager = { | ||
| offsetManagement | ||
| } | ||
|
|
||
| private def getFilter(tableName: String, infoDateBeginOpt: Option[LocalDate], infoDateEndOpt: Option[LocalDate], batchId: Option[Long]): Query[BookkeepingRecords, BookkeepingRecord, Seq] = { | ||
| private def getFilter(tableName: String, infoDateBeginOpt: Option[LocalDate], infoDateEndOpt: Option[LocalDate], batchId: Option[Long]): Query[pramenDb.bookkeepingTable.BookkeepingRecords, BookkeepingRecord, Seq] = { | ||
| val baseFilter = (infoDateBeginOpt, infoDateEndOpt) match { | ||
| case (Some(infoDateBegin), Some(infoDateEnd)) => | ||
| val date0Str = DataChunk.dateFormatter.format(infoDateBegin) | ||
| val date1Str = DataChunk.dateFormatter.format(infoDateEnd) | ||
|
|
||
| if (date0Str == date1Str) { | ||
| BookkeepingRecords.records | ||
| pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === tableName && r.infoDate === date0Str) | ||
| } else { | ||
| BookkeepingRecords.records | ||
| pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === tableName && r.infoDate >= date0Str && r.infoDate <= date1Str) | ||
| } | ||
| case (Some(infoDateBegin), None) => | ||
| val date0Str = DataChunk.dateFormatter.format(infoDateBegin) | ||
|
|
||
| BookkeepingRecords.records | ||
| pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === tableName && r.infoDate >= date0Str) | ||
| case (None, Some(infoDateEnd)) => | ||
| val date1Str = DataChunk.dateFormatter.format(infoDateEnd) | ||
|
|
||
| BookkeepingRecords.records | ||
| pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === tableName && r.infoDate <= date1Str) | ||
| case (None, None) => | ||
| BookkeepingRecords.records | ||
| pramenDb.bookkeepingTable.records | ||
| .filter(r => r.pramenTableName === tableName) | ||
| } | ||
|
|
||
|
|
@@ -287,11 +284,11 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
|
|
||
| override def getLatestSchema(table: String, infoDate: LocalDate): Option[(StructType, LocalDate)] = { | ||
| val infoDateStr = infoDate.toString | ||
| val query = SchemaRecords.records.filter(t => t.pramenTableName === table && t.infoDate <= infoDateStr) | ||
| val query = pramenDb.schemaTable.records.filter(t => t.pramenTableName === table && t.infoDate <= infoDateStr) | ||
| .sortBy(t => t.infoDate.desc) | ||
| .take(1) | ||
|
|
||
| SlickUtils.executeQuery[SchemaRecords, SchemaRecord](db, query) | ||
| slickUtils.executeQuery(pramenDb.slickDb, query) | ||
| .map(schemaRecord => TableSchema(schemaRecord.pramenTableName, schemaRecord.infoDate, schemaRecord.schemaJson)) | ||
| .flatMap(tableSchema => | ||
| TableSchema.toSchemaAndDate(tableSchema) | ||
|
|
@@ -303,13 +300,13 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| val infoDateStr = infoDate.toString | ||
|
|
||
| try { | ||
| SlickUtils.ensureDbConnected(db) | ||
| db.run( | ||
| SchemaRecords.records.filter(t => t.pramenTableName === table && t.infoDate === infoDateStr).delete | ||
| slickUtils.ensureDbConnected(pramenDb.slickDb) | ||
| pramenDb.slickDb.run( | ||
| pramenDb.schemaTable.records.filter(t => t.pramenTableName === table && t.infoDate === infoDateStr).delete | ||
| ).execute() | ||
|
|
||
| db.run( | ||
| SchemaRecords.records += SchemaRecord(table, infoDate.toString, schema.json) | ||
| pramenDb.slickDb.run( | ||
| pramenDb.schemaTable.records += SchemaRecord(table, infoDate.toString, schema.json) | ||
| ).execute() | ||
| } catch { | ||
| case NonFatal(ex) => log.error(s"Unable to write to the bookkeeping schema table.", ex) | ||
|
|
@@ -319,12 +316,13 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| /** This method is for migration purposes*/ | ||
| private[pramen] def saveSchemaRaw(table: String, infoDate: String, schema: String): Unit = { | ||
| try { | ||
| db.run( | ||
| SchemaRecords.records.filter(t => t.pramenTableName === table && t.infoDate === infoDate).delete | ||
| slickUtils.ensureDbConnected(pramenDb.slickDb) | ||
| pramenDb.slickDb.run( | ||
| pramenDb.schemaTable.records.filter(t => t.pramenTableName === table && t.infoDate === infoDate).delete | ||
| ).execute() | ||
|
|
||
| db.run( | ||
| SchemaRecords.records += SchemaRecord(table, infoDate, schema) | ||
| pramenDb.slickDb.run( | ||
| pramenDb.schemaTable.records += SchemaRecord(table, infoDate, schema) | ||
| ).execute() | ||
| } catch { | ||
| case NonFatal(ex) => log.error(s"Unable to write to the bookkeeping schema table.", ex) | ||
|
|
@@ -333,19 +331,12 @@ class BookkeeperJdbc(db: Database, profile: JdbcProfile, batchId: Long) extends | |
| } | ||
|
|
||
| object BookkeeperJdbc { | ||
| def fromJdbcConfig(jdbcConfig: JdbcConfig, batchId: Long): BookkeeperJdbc = { | ||
| val selector = JdbcUrlSelector(jdbcConfig) | ||
| val url = selector.getWorkingUrl(DEFAULT_RETRIES) | ||
| val prop = selector.getProperties | ||
|
|
||
| val profile = PramenDb.getProfile(jdbcConfig.driver) | ||
|
|
||
| val db = if (jdbcConfig.user.nonEmpty) { | ||
| Database.forURL(url = url, driver = jdbcConfig.driver, user = jdbcConfig.user.get, password = jdbcConfig.password.getOrElse(""), prop = prop) | ||
| } else { | ||
| Database.forURL(url = url, driver = jdbcConfig.driver, prop = prop) | ||
| } | ||
| new BookkeeperJdbc(db, profile, batchId) | ||
| def fromPramenDb(pramenDb: PramenDb, batchId: Long): BookkeeperJdbc = { | ||
| new BookkeeperJdbc(pramenDb, batchId, false) | ||
| } | ||
|
|
||
| def fromJdbcConfig(jdbcConfig: JdbcConfig, batchId: Long): BookkeeperJdbc = { | ||
| val pramenDb = PramenDb(jdbcConfig) | ||
| new BookkeeperJdbc(pramenDb, batchId, true) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
closable = nullis unreachable ifclosable.close()throws — risk of double-close.If
closable.close()throws,closable = nullon Line 48 is skipped. A subsequent call toclose()will pass theif (closable != null)guard again and invoke bothbookkeeper.close()andclosable.close()a second time.Set the field to
null(via a local capture) before callingclose()to make the guard idempotent regardless of exceptions:🛡️ Proposed fix
try { bookkeeper.close() } finally { - closable.close() - closable = null + val c = closable + closable = null + c.close() }📝 Committable suggestion
🤖 Prompt for AI Agents