From 675e9621b98571b678bc4bc60399d0c7919d5a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Wed, 7 Jan 2026 21:30:47 +0100 Subject: [PATCH 01/21] Add changeset entry types for schema changes --- geodiff/src/changeset.h | 69 +++++++++- geodiff/src/changesetconcat.cpp | 60 +++++---- geodiff/src/changesetreader.cpp | 106 ++++++++++++--- geodiff/src/changesetreader.h | 6 + geodiff/src/changesetutils.cpp | 172 +++++++++++++++++------- geodiff/src/changesetutils.h | 5 +- geodiff/src/changesetwriter.cpp | 74 ++++++++-- geodiff/src/changesetwriter.h | 7 + geodiff/src/drivers/postgresdriver.cpp | 65 ++++----- geodiff/src/drivers/postgresdriver.h | 4 +- geodiff/src/drivers/sqlitedriver.cpp | 71 +++++----- geodiff/src/drivers/sqlitedriver.h | 4 +- geodiff/src/geodiff.cpp | 18 +-- geodiff/src/geodiffrebase.cpp | 80 ++++++----- geodiff/src/geodiffutils.cpp | 6 +- geodiff/src/geodiffutils.hpp | 4 +- geodiff/tests/geodiff_testutils.cpp | 35 +++-- geodiff/tests/geodiff_testutils.hpp | 3 +- geodiff/tests/test_changeset_reader.cpp | 84 +++++++----- geodiff/tests/test_changeset_utils.cpp | 136 ++++++++++--------- geodiff/tests/test_geometry_utils.cpp | 15 ++- 21 files changed, 687 insertions(+), 337 deletions(-) diff --git a/geodiff/src/changeset.h b/geodiff/src/changeset.h index 98b81340..fc57a044 100644 --- a/geodiff/src/changeset.h +++ b/geodiff/src/changeset.h @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -200,9 +201,23 @@ struct ChangesetTable size_t columnCount() const { return primaryKeys.size(); } }; +/** + * Types of supported changeset records. + */ +enum class ChangesetEntryType +{ + OpTableRecord = 'T', //!< corresponds to ChangesetTable + OpInsert = 18, //!< corresponds to ChangesetDataEntry + OpUpdate = 23, //!< corresponds to ChangesetDataEntry + OpDelete = 9, //!< corresponds to ChangesetDataEntry + OpCreateTable = 'a', //!< corresponds to ChangesetTable + OpDropTable = 'A', + OpAddColumn = 'c', + OpDropColumn = 'C', +}; /** - * Details of a single change within a changeset + * Details of a single data change within a changeset * * Contents of old/new values array based on operation type: * - INSERT - new values contain data of the row to be inserted, old values array is invalid @@ -212,7 +227,7 @@ struct ChangesetTable * columns of old value are always present (but new value of pkey columns is undefined * if the primary key is not being changed). */ -struct ChangesetEntry +struct ChangesetDataEntry { enum OperationType { @@ -239,9 +254,9 @@ struct ChangesetEntry ChangesetTable *table = nullptr; //! a quick way for tests to create a changeset entry - static ChangesetEntry make( ChangesetTable *t, OperationType o, const std::vector &oldV, const std::vector &newV ) + static ChangesetDataEntry make( ChangesetTable *t, OperationType o, const std::vector &oldV, const std::vector &newV ) { - ChangesetEntry e; + ChangesetDataEntry e; e.op = o; e.oldValues = oldV; e.newValues = newV; @@ -250,4 +265,50 @@ struct ChangesetEntry } }; +//! Description of column used by DDL entries +struct ChangesetDdlColumn +{ + std::string name; + std::string type; + bool isNotNull; + bool isUnique; +}; + +//! Entry for CREATE TABLE command +struct ChangesetCreateTableEntry +{ + std::string tableName; + std::vector columns; +}; + +//! Entry for DROP TABLE command +struct ChangesetDropTableEntry +{ + std::string tableName; +}; + +//! Entry for ALTER TABLE ... ADD COLUMN command +struct ChangesetAddColumnEntry +{ + std::string tableName; + ChangesetDdlColumn column; +}; + +//! Entry for ALTER TABLE ... DROP COLUMN command +struct ChangesetDropColumnEntry +{ + std::string tableName; + std::string columnName; +}; + +struct ChangesetEntry : public std::variant < + ChangesetDataEntry, + ChangesetCreateTableEntry, + ChangesetDropTableEntry, + ChangesetAddColumnEntry, + ChangesetDropColumnEntry > +{ + using variant::variant; // Use std::variant's constructor +}; + #endif // CHANGESET_H diff --git a/geodiff/src/changesetconcat.cpp b/geodiff/src/changesetconcat.cpp index f0f4c64e..1b1156a6 100644 --- a/geodiff/src/changesetconcat.cpp +++ b/geodiff/src/changesetconcat.cpp @@ -3,12 +3,14 @@ Copyright (C) 2021 Martin Dobias */ +#include "changeset.h" #include "sqlite3.h" #include #include #include #include +#include #include "geodifflogger.hpp" #include "geodiffcontext.hpp" @@ -20,12 +22,12 @@ //! Hash value generator based on primary keys to have ChangesetEntry used in std::unordered_set struct HashChangesetEntryPkey { - size_t operator()( const ChangesetEntry *pentry ) const + size_t operator()( const ChangesetDataEntry *pentry ) const { size_t h = 0; - const ChangesetEntry &entry = *pentry; + const ChangesetDataEntry &entry = *pentry; const std::vector &pkeys = entry.table->primaryKeys; - const std::vector &values = entry.op == ChangesetEntry::OpInsert ? entry.newValues : entry.oldValues; + const std::vector &values = entry.op == ChangesetDataEntry::OpInsert ? entry.newValues : entry.oldValues; for ( size_t i = 0; i < pkeys.size(); ++i ) { if ( pkeys[i] ) @@ -39,13 +41,13 @@ struct HashChangesetEntryPkey //! Exact equality check based on primary keys to have ChangesetEntry used in std::unordered_set struct EqualToChangesetEntryPkey { - bool operator()( const ChangesetEntry *plhs, const ChangesetEntry *prhs ) const + bool operator()( const ChangesetDataEntry *plhs, const ChangesetDataEntry *prhs ) const { - const ChangesetEntry &lhs = *plhs; - const ChangesetEntry &rhs = *prhs; + const ChangesetDataEntry &lhs = *plhs; + const ChangesetDataEntry &rhs = *prhs; const std::vector &pkeys = lhs.table->primaryKeys; - const std::vector &lhsValues = lhs.op == ChangesetEntry::OpInsert ? lhs.newValues : lhs.oldValues; - const std::vector &rhsValues = rhs.op == ChangesetEntry::OpInsert ? rhs.newValues : rhs.oldValues; + const std::vector &lhsValues = lhs.op == ChangesetDataEntry::OpInsert ? lhs.newValues : lhs.oldValues; + const std::vector &rhsValues = rhs.op == ChangesetDataEntry::OpInsert ? rhs.newValues : rhs.oldValues; for ( size_t i = 0; i < pkeys.size(); ++i ) { if ( pkeys[i] && lhsValues[i] != rhsValues[i] ) @@ -55,7 +57,7 @@ struct EqualToChangesetEntryPkey } }; -typedef std::unordered_set TableEntriesSet; +typedef std::unordered_set TableEntriesSet; //! Struct to keep information about table and its changes while concatenating struct TableChanges @@ -127,20 +129,20 @@ enum MergeEntriesResult //! Takes two changeset entries e1 and e2 and merges their changes to e1 if possible. //! It is also possible that merging results in no change at all, or the change is not allowed -static MergeEntriesResult mergeEntriesForRow( ChangesetEntry *e1, ChangesetEntry *e2 ) +static MergeEntriesResult mergeEntriesForRow( ChangesetDataEntry *e1, ChangesetDataEntry *e2 ) { // all these changes make no sense really, if they happen most likely something got broken // (e.g. adding a row with the same pkey twice) - if ( ( e1->op == ChangesetEntry::OpInsert && e2->op == ChangesetEntry::OpInsert ) || - ( e1->op == ChangesetEntry::OpUpdate && e2->op == ChangesetEntry::OpInsert ) || - ( e1->op == ChangesetEntry::OpDelete && e2->op == ChangesetEntry::OpUpdate ) || - ( e1->op == ChangesetEntry::OpDelete && e2->op == ChangesetEntry::OpDelete ) ) + if ( ( e1->op == ChangesetDataEntry::OpInsert && e2->op == ChangesetDataEntry::OpInsert ) || + ( e1->op == ChangesetDataEntry::OpUpdate && e2->op == ChangesetDataEntry::OpInsert ) || + ( e1->op == ChangesetDataEntry::OpDelete && e2->op == ChangesetDataEntry::OpUpdate ) || + ( e1->op == ChangesetDataEntry::OpDelete && e2->op == ChangesetDataEntry::OpDelete ) ) return Unsupported; - if ( e1->op == ChangesetEntry::OpInsert && e2->op == ChangesetEntry::OpDelete ) + if ( e1->op == ChangesetDataEntry::OpInsert && e2->op == ChangesetDataEntry::OpDelete ) return EntryRemoved; - if ( e1->op == ChangesetEntry::OpInsert && e2->op == ChangesetEntry::OpUpdate ) + if ( e1->op == ChangesetDataEntry::OpInsert && e2->op == ChangesetDataEntry::OpUpdate ) { // modify INSERT - update its values wherever the update has a newer value for ( size_t i = 0; i < e1->table->columnCount(); ++i ) @@ -151,7 +153,7 @@ static MergeEntriesResult mergeEntriesForRow( ChangesetEntry *e1, ChangesetEntry return EntryModified; } - if ( e1->op == ChangesetEntry::OpUpdate && e2->op == ChangesetEntry::OpUpdate ) + if ( e1->op == ChangesetDataEntry::OpUpdate && e2->op == ChangesetDataEntry::OpUpdate ) { // modify UPDATE std::vector oldVals, newVals; @@ -162,10 +164,10 @@ static MergeEntriesResult mergeEntriesForRow( ChangesetEntry *e1, ChangesetEntry return EntryModified; } - if ( e1->op == ChangesetEntry::OpUpdate && e2->op == ChangesetEntry::OpDelete ) + if ( e1->op == ChangesetDataEntry::OpUpdate && e2->op == ChangesetDataEntry::OpDelete ) { // turn into DELETE, use old values from delete when update does not list them - e1->op = ChangesetEntry::OpDelete; + e1->op = ChangesetDataEntry::OpDelete; for ( size_t i = 0; i < e1->table->columnCount(); ++i ) { if ( e1->oldValues[i].type() == Value::TypeUndefined ) @@ -174,13 +176,13 @@ static MergeEntriesResult mergeEntriesForRow( ChangesetEntry *e1, ChangesetEntry return EntryModified; } - if ( e1->op == ChangesetEntry::OpDelete && e2->op == ChangesetEntry::OpInsert ) + if ( e1->op == ChangesetDataEntry::OpDelete && e2->op == ChangesetDataEntry::OpInsert ) { // turn into UPDATE std::vector oldVals, newVals; if ( !mergeUpdate( *e1->table, e1->oldValues, {}, e2->newValues, {}, oldVals, newVals ) ) return EntryRemoved; - e1->op = ChangesetEntry::OpUpdate; + e1->op = ChangesetDataEntry::OpUpdate; e1->oldValues = oldVals; e1->newValues = newVals; return EntryModified; @@ -207,15 +209,19 @@ void concatChangesets( if ( !reader.open( inputFilename ) ) throw GeoDiffException( "concatChangesets: unable to open input file: " + inputFilename ); - ChangesetEntry entry; - while ( reader.nextEntry( entry ) ) + ChangesetEntry fullEntry; + while ( reader.nextEntry( fullEntry ) ) { + if ( !std::holds_alternative( fullEntry ) ) + // TODO(dvdkon): Implement + throw GeoDiffException( "concatChanges doesn't handle DDL changes yet" ); + ChangesetDataEntry &entry = std::get( fullEntry ); auto tableIt = result.find( entry.table->name ); if ( tableIt == result.end() ) { TableChanges &t = result[ entry.table->name ]; // adds new entry t.table.reset( new ChangesetTable( *entry.table ) ); - ChangesetEntry *e = new ChangesetEntry( entry ); + ChangesetDataEntry *e = new ChangesetDataEntry( entry ); e->table = t.table.get(); t.entries.insert( e ); } @@ -226,14 +232,14 @@ void concatChangesets( if ( entriesIt == t.entries.end() ) { // row with this pkey is not in our list yet - ChangesetEntry *e = new ChangesetEntry( entry ); + ChangesetDataEntry *e = new ChangesetDataEntry( entry ); e->table = t.table.get(); t.entries.insert( e ); } else { // we need to merge the recorded entry with the new one - ChangesetEntry *entry0 = *entriesIt; + ChangesetDataEntry *entry0 = *entriesIt; MergeEntriesResult mergeRes = mergeEntriesForRow( entry0, &entry ); switch ( mergeRes ) { @@ -266,7 +272,7 @@ void concatChangesets( continue; writer.beginTable( *t.table ); - for ( ChangesetEntry *e : t.entries ) + for ( ChangesetDataEntry *e : t.entries ) { writer.writeEntry( *e ); delete e; diff --git a/geodiff/src/changesetreader.cpp b/geodiff/src/changesetreader.cpp index 503cc404..20847503 100644 --- a/geodiff/src/changesetreader.cpp +++ b/geodiff/src/changesetreader.cpp @@ -5,6 +5,7 @@ #include "changesetreader.h" +#include "changeset.h" #include "geodiffutils.hpp" #include "changesetgetvarint.h" #include "portableendian.h" @@ -42,31 +43,40 @@ bool ChangesetReader::nextEntry( ChangesetEntry &entry ) if ( mOffset >= mBuffer->size() ) break; // EOF - int type = readByte(); - if ( type == 'T' ) + ChangesetEntryType type = static_cast( readByte() ); + if ( type == ChangesetEntryType::OpTableRecord ) { readTableRecord(); // and now continue reading, we want an entry } - else if ( type == ChangesetEntry::OpInsert || type == ChangesetEntry::OpUpdate || type == ChangesetEntry::OpDelete ) + else if ( type == ChangesetEntryType::OpInsert || type == ChangesetEntryType::OpUpdate || type == ChangesetEntryType::OpDelete ) { - readByte(); - if ( type != ChangesetEntry::OpInsert ) - readRowValues( entry.oldValues ); - else - entry.oldValues.erase( entry.oldValues.begin(), entry.oldValues.end() ); - if ( type != ChangesetEntry::OpDelete ) - readRowValues( entry.newValues ); - else - entry.newValues.erase( entry.newValues.begin(), entry.newValues.end() ); - - entry.op = static_cast( type ); - entry.table = &mCurrentTable; + entry = readDataEntry( type ); return true; // we're done! } + else if ( type == ChangesetEntryType::OpCreateTable ) + { + entry = readCreateTableEntry(); + return true; + } + else if ( type == ChangesetEntryType::OpDropTable ) + { + entry = readDropTableEntry(); + return true; + } + else if ( type == ChangesetEntryType::OpAddColumn ) + { + entry = readAddColumnEntry(); + return true; + } + else if ( type == ChangesetEntryType::OpDropColumn ) + { + entry = readDropColumnEntry(); + return true; + } else { - throwReaderError( "Unknown entry type " + std::to_string( type ) ); + throwReaderError( "Unknown entry type " + std::to_string( static_cast( type ) ) ); } } return false; @@ -195,6 +205,70 @@ void ChangesetReader::readTableRecord() mCurrentTable.name = readNullTerminatedString(); } +ChangesetDataEntry ChangesetReader::readDataEntry( ChangesetEntryType type ) +{ + ChangesetDataEntry entry; + readByte(); + if ( type != ChangesetEntryType::OpInsert ) + readRowValues( entry.oldValues ); + else + entry.oldValues.erase( entry.oldValues.begin(), entry.oldValues.end() ); + if ( type != ChangesetEntryType::OpDelete ) + readRowValues( entry.newValues ); + else + entry.newValues.erase( entry.newValues.begin(), entry.newValues.end() ); + + entry.op = static_cast( type ); + entry.table = &mCurrentTable; + return entry; +} + +ChangesetDdlColumn ChangesetReader::readDdlColumn() +{ + ChangesetDdlColumn column; + column.name = readNullTerminatedString(); + column.type = readNullTerminatedString(); + char flags = readByte(); + column.isNotNull = flags & 0x1; + column.isUnique = flags & 0x2; + return column; +} + +ChangesetCreateTableEntry ChangesetReader::readCreateTableEntry() +{ + ChangesetCreateTableEntry entry; + entry.tableName = readNullTerminatedString(); + int columnCount = readVarint(); + entry.columns.resize( columnCount ); + for ( size_t i = 0; i < entry.columns.size(); i++ ) + { + entry.columns[i] = readDdlColumn(); + } + return entry; +} + +ChangesetDropTableEntry ChangesetReader::readDropTableEntry() +{ + ChangesetDropTableEntry entry; + entry.tableName = readNullTerminatedString(); + return entry; +} + +ChangesetAddColumnEntry ChangesetReader::readAddColumnEntry() +{ + ChangesetAddColumnEntry entry; + entry.tableName = readNullTerminatedString(); + entry.column = readDdlColumn(); + return entry; +} + +ChangesetDropColumnEntry ChangesetReader::readDropColumnEntry() +{ + ChangesetDropColumnEntry entry; + entry.tableName = readNullTerminatedString(); + entry.columnName = readNullTerminatedString(); + return entry; +} void ChangesetReader::throwReaderError( const std::string &message ) const { diff --git a/geodiff/src/changesetreader.h b/geodiff/src/changesetreader.h index 37254e60..a6c45532 100644 --- a/geodiff/src/changesetreader.h +++ b/geodiff/src/changesetreader.h @@ -45,6 +45,12 @@ class ChangesetReader std::string readNullTerminatedString(); void readRowValues( std::vector &values ); void readTableRecord(); + ChangesetDataEntry readDataEntry( ChangesetEntryType type ); + ChangesetDdlColumn readDdlColumn(); + ChangesetCreateTableEntry readCreateTableEntry(); + ChangesetDropTableEntry readDropTableEntry(); + ChangesetAddColumnEntry readAddColumnEntry(); + ChangesetDropColumnEntry readDropColumnEntry(); void throwReaderError( const std::string &message ) const; diff --git a/geodiff/src/changesetutils.cpp b/geodiff/src/changesetutils.cpp index bde8148c..996bfce4 100644 --- a/geodiff/src/changesetutils.cpp +++ b/geodiff/src/changesetutils.cpp @@ -6,6 +6,7 @@ #include "changesetutils.h" #include "base64utils.h" +#include "changeset.h" #include "geodiffutils.hpp" #include "changesetreader.h" #include "changesetwriter.h" @@ -28,50 +29,71 @@ void invertChangeset( ChangesetReader &reader, ChangesetWriter &writer ) ChangesetEntry entry; while ( reader.nextEntry( entry ) ) { - assert( entry.table ); - if ( entry.table->name != currentTableName ) + if ( ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) { - writer.beginTable( *entry.table ); - currentTableName = entry.table->name; - currentPkeys = entry.table->primaryKeys; - } + assert( dataEntry->table ); + if ( dataEntry->table->name != currentTableName ) + { + writer.beginTable( *dataEntry->table ); + currentTableName = dataEntry->table->name; + currentPkeys = dataEntry->table->primaryKeys; + } - if ( entry.op == ChangesetEntry::OpInsert ) - { - ChangesetEntry out; - out.op = ChangesetEntry::OpDelete; - out.oldValues = entry.newValues; - writer.writeEntry( out ); + if ( dataEntry->op == ChangesetDataEntry::OpInsert ) + { + ChangesetDataEntry out; + out.op = ChangesetDataEntry::OpDelete; + out.oldValues = dataEntry->newValues; + writer.writeEntry( out ); + } + else if ( dataEntry->op == ChangesetDataEntry::OpDelete ) + { + ChangesetDataEntry out; + out.op = ChangesetDataEntry::OpInsert; + out.newValues = dataEntry->oldValues; + writer.writeEntry( out ); + } + else if ( dataEntry->op == ChangesetDataEntry::OpUpdate ) + { + ChangesetDataEntry out; + out.op = ChangesetDataEntry::OpUpdate; + out.newValues = dataEntry->oldValues; + out.oldValues = dataEntry->newValues; + // if a column is a part of pkey and has not been changed, + // the original entry has "old" value the pkey value and "new" + // value is undefined - let's reverse "old" and "new" in that case. + for ( size_t i = 0; i < currentPkeys.size(); ++i ) + { + if ( currentPkeys[i] && out.oldValues[i].type() == Value::TypeUndefined ) + { + out.oldValues[i] = out.newValues[i]; + out.newValues[i].setUndefined(); + } + } + writer.writeEntry( out ); + } + else + { + throw GeoDiffException( "Unknown entry operation!" ); + } } - else if ( entry.op == ChangesetEntry::OpDelete ) + else if ( ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) { - ChangesetEntry out; - out.op = ChangesetEntry::OpInsert; - out.newValues = entry.oldValues; + ChangesetDropTableEntry out; + out.tableName = ctEntry->tableName; writer.writeEntry( out ); } - else if ( entry.op == ChangesetEntry::OpUpdate ) + else if ( ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) { - ChangesetEntry out; - out.op = ChangesetEntry::OpUpdate; - out.newValues = entry.oldValues; - out.oldValues = entry.newValues; - // if a column is a part of pkey and has not been changed, - // the original entry has "old" value the pkey value and "new" - // value is undefined - let's reverse "old" and "new" in that case. - for ( size_t i = 0; i < currentPkeys.size(); ++i ) - { - if ( currentPkeys[i] && out.oldValues[i].type() == Value::TypeUndefined ) - { - out.oldValues[i] = out.newValues[i]; - out.newValues[i].setUndefined(); - } - } + ChangesetDropColumnEntry out; + out.tableName = acEntry->tableName; + out.columnName = acEntry->column.name; writer.writeEntry( out ); } else { - throw GeoDiffException( "Unknown entry operation!" ); + // We can't invert DROP TABLE/COLUMN, because we don't know what's being dropped + throw GeoDiffException( "Cannot invert changeset entry variant " + std::to_string( entry.index() ) ); } } } @@ -112,21 +134,21 @@ nlohmann::json valueToJSON( const Value &value ) } -nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ) +nlohmann::json changesetDataEntryToJSON( const ChangesetDataEntry &entry ) { std::string status; - if ( entry.op == ChangesetEntry::OpUpdate ) + if ( entry.op == ChangesetDataEntry::OpUpdate ) status = "update"; - else if ( entry.op == ChangesetEntry::OpInsert ) + else if ( entry.op == ChangesetDataEntry::OpInsert ) status = "insert"; - else if ( entry.op == ChangesetEntry::OpDelete ) + else if ( entry.op == ChangesetDataEntry::OpDelete ) status = "delete"; // Check that the table column count matches the vector sizes to prevent // out-of-bounds errors. - if ( ( ( entry.op == ChangesetEntry::OpUpdate || entry.op == ChangesetEntry::OpInsert ) + if ( ( ( entry.op == ChangesetDataEntry::OpUpdate || entry.op == ChangesetDataEntry::OpInsert ) && entry.table->columnCount() != entry.newValues.size() ) - || ( ( entry.op == ChangesetEntry::OpUpdate || entry.op == ChangesetEntry::OpDelete ) + || ( ( entry.op == ChangesetDataEntry::OpUpdate || entry.op == ChangesetDataEntry::OpDelete ) && entry.table->columnCount() != entry.oldValues.size() ) ) throw GeoDiffException( "Table column count doesn't match value list size" ); @@ -139,8 +161,8 @@ nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ) Value valueOld, valueNew; for ( size_t i = 0; i < entry.table->columnCount(); ++i ) { - valueNew = ( entry.op == ChangesetEntry::OpUpdate || entry.op == ChangesetEntry::OpInsert ) ? entry.newValues[i] : Value(); - valueOld = ( entry.op == ChangesetEntry::OpUpdate || entry.op == ChangesetEntry::OpDelete ) ? entry.oldValues[i] : Value(); + valueNew = ( entry.op == ChangesetDataEntry::OpUpdate || entry.op == ChangesetDataEntry::OpInsert ) ? entry.newValues[i] : Value(); + valueOld = ( entry.op == ChangesetDataEntry::OpUpdate || entry.op == ChangesetDataEntry::OpDelete ) ? entry.oldValues[i] : Value(); nlohmann::json change; @@ -174,6 +196,63 @@ nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ) return res; } +static nlohmann::json changesetColumnToJSON( const ChangesetDdlColumn &column ) +{ + nlohmann::json res; + res["name"] = column.name; + res["type"] = column.type; + res["isNotNull"] = column.isNotNull; + res["isUnique"] = column.isUnique; + return res; +} + +nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ) +{ + if ( const ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) + { + return changesetDataEntryToJSON( *dataEntry ); + } + else if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) + { + nlohmann::json res; + res["type"] = "create_table"; + res["tableName"] = ctEntry->tableName; + res["columns"] = nlohmann::json::array(); + for ( const ChangesetDdlColumn &column : ctEntry->columns ) + { + res["columns"].push_back( changesetColumnToJSON( column ) ); + } + return res; + } + else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + { + nlohmann::json res; + res["type"] = "drop_table"; + res["tableName"] = dtEntry->tableName; + return res; + } + else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + { + nlohmann::json res; + res["type"] = "add_column"; + res["tableName"] = acEntry->tableName; + res["column"] = changesetColumnToJSON( acEntry->column ); + return res; + } + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + { + nlohmann::json res; + res["type"] = "drop_column"; + res["tableName"] = dcEntry->tableName; + res["columnName"] = dcEntry->columnName; + return res; + } + else + { + throw GeoDiffException( "Cannot convert entry variant " + std::to_string( entry.index() ) + " to JSON" ); + } +} + nlohmann::json changesetToJSON( ChangesetReader &reader ) { auto entries = nlohmann::json::array(); @@ -209,14 +288,17 @@ nlohmann::json changesetToJSONSummary( ChangesetReader &reader ) ChangesetEntry entry; while ( reader.nextEntry( entry ) ) { - std::string tableName = entry.table->name; + if ( !std::holds_alternative( entry ) ) + continue; + ChangesetDataEntry &dataEntry = std::get( entry ); + std::string tableName = dataEntry.table->name; TableSummary &tableSummary = summary[tableName]; - if ( entry.op == ChangesetEntry::OpUpdate ) + if ( dataEntry.op == ChangesetDataEntry::OpUpdate ) ++tableSummary.updates; - else if ( entry.op == ChangesetEntry::OpInsert ) + else if ( dataEntry.op == ChangesetDataEntry::OpInsert ) ++tableSummary.inserts; - else if ( entry.op == ChangesetEntry::OpDelete ) + else if ( dataEntry.op == ChangesetDataEntry::OpDelete ) ++tableSummary.deletes; } diff --git a/geodiff/src/changesetutils.h b/geodiff/src/changesetutils.h index e836436f..7fc5268b 100644 --- a/geodiff/src/changesetutils.h +++ b/geodiff/src/changesetutils.h @@ -7,6 +7,7 @@ #define CHANGESETUTILS_H #include "geodiff.h" +#include "changeset.h" #include #include @@ -15,7 +16,7 @@ class ConflictFeature; class ChangesetReader; class ChangesetWriter; -struct ChangesetEntry; +struct ChangesetDataEntry; struct ChangesetTable; struct TableSchema; struct Value; @@ -27,6 +28,8 @@ void invertChangeset( ChangesetReader &reader, ChangesetWriter &writer ); void concatChangesets( const Context *context, const std::vector &filenames, const std::string &outputChangeset ); +nlohmann::json changesetDataEntryToJSON( const ChangesetDataEntry &entry ); + nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ); nlohmann::json changesetToJSON( ChangesetReader &reader ); diff --git a/geodiff/src/changesetwriter.cpp b/geodiff/src/changesetwriter.cpp index bb57afc3..6ed7ae32 100644 --- a/geodiff/src/changesetwriter.cpp +++ b/geodiff/src/changesetwriter.cpp @@ -5,6 +5,7 @@ #include "changesetwriter.h" +#include "changeset.h" #include "geodiffutils.hpp" #include "changesetputvarint.h" #include "portableendian.h" @@ -13,6 +14,7 @@ #include #include +#include void ChangesetWriter::open( const std::string &filename ) { @@ -29,7 +31,7 @@ void ChangesetWriter::beginTable( const ChangesetTable &table ) { mCurrentTable = table; - writeByte( 'T' ); + writeByte( ( int ) ChangesetEntryType::OpTableRecord ); writeVarint( ( int ) table.columnCount() ); for ( size_t i = 0; i < table.columnCount(); ++i ) writeByte( table.primaryKeys[i] ); @@ -38,15 +40,19 @@ void ChangesetWriter::beginTable( const ChangesetTable &table ) void ChangesetWriter::writeEntry( const ChangesetEntry &entry ) { - if ( entry.op != ChangesetEntry::OpInsert && entry.op != ChangesetEntry::OpUpdate && entry.op != ChangesetEntry::OpDelete ) - throw GeoDiffException( "wrong op for changeset entry" ); - writeByte( ( char ) entry.op ); - writeByte( 0 ); // "indirect" always false - - if ( entry.op != ChangesetEntry::OpInsert ) - writeRowValues( entry.oldValues ); - if ( entry.op != ChangesetEntry::OpDelete ) - writeRowValues( entry.newValues ); + if ( const ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) + writeDataEntry( *dataEntry ); + if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) + writeCreateTableEntry( *ctEntry ); + if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + writeDropTableEntry( *dtEntry ); + if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + writeAddColumnEntry( *acEntry ); + if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + writeDropColumnEntry( *dcEntry ); + else + throw GeoDiffException( "Tried to write unhandled changeset entry type! " + + std::to_string( entry.index() ) ); } void ChangesetWriter::writeByte( char c ) @@ -113,3 +119,51 @@ void ChangesetWriter::writeRowValues( const std::vector &values ) } } } + +void ChangesetWriter::writeDdlColumn( const ChangesetDdlColumn &column ) +{ + writeNullTerminatedString( column.name ); + writeNullTerminatedString( column.type ); + writeByte( column.isNotNull | ( column.isUnique << 1 ) ); +} + + +void ChangesetWriter::writeDataEntry( const ChangesetDataEntry &entry ) +{ + if ( entry.op != ChangesetDataEntry::OpInsert && entry.op != ChangesetDataEntry::OpUpdate && entry.op != ChangesetDataEntry::OpDelete ) + throw GeoDiffException( "wrong op for changeset entry" ); + writeByte( ( char ) entry.op ); + writeByte( 0 ); // "indirect" always false + + if ( entry.op != ( int ) ChangesetEntryType::OpInsert ) + writeRowValues( entry.oldValues ); + if ( entry.op != ( int ) ChangesetEntryType::OpDelete ) + writeRowValues( entry.newValues ); +} + +void ChangesetWriter::writeCreateTableEntry( const ChangesetCreateTableEntry &entry ) +{ + writeNullTerminatedString( entry.tableName ); + writeVarint( entry.columns.size() ); + for ( const ChangesetDdlColumn &column : entry.columns ) + { + writeDdlColumn( column ); + } +} + +void ChangesetWriter::writeDropTableEntry( const ChangesetDropTableEntry &entry ) +{ + writeNullTerminatedString( entry.tableName ); +} + +void ChangesetWriter::writeAddColumnEntry( const ChangesetAddColumnEntry &entry ) +{ + writeNullTerminatedString( entry.tableName ); + writeDdlColumn( entry.column ); +} + +void ChangesetWriter::writeDropColumnEntry( const ChangesetDropColumnEntry &entry ) +{ + writeNullTerminatedString( entry.tableName ); + writeNullTerminatedString( entry.columnName ); +} diff --git a/geodiff/src/changesetwriter.h b/geodiff/src/changesetwriter.h index eed79a86..3f5d5676 100644 --- a/geodiff/src/changesetwriter.h +++ b/geodiff/src/changesetwriter.h @@ -43,6 +43,13 @@ class ChangesetWriter void writeNullTerminatedString( const std::string &str ); void writeRowValues( const std::vector &values ); + void writeDdlColumn( const ChangesetDdlColumn &column ); + + void writeDataEntry( const ChangesetDataEntry &entry ); + void writeCreateTableEntry( const ChangesetCreateTableEntry &entry ); + void writeDropTableEntry( const ChangesetDropTableEntry &entry ); + void writeAddColumnEntry( const ChangesetAddColumnEntry &entry ); + void writeDropColumnEntry( const ChangesetDropColumnEntry &entry ); std::ofstream mFile; diff --git a/geodiff/src/drivers/postgresdriver.cpp b/geodiff/src/drivers/postgresdriver.cpp index 38deef5a..d0077b80 100644 --- a/geodiff/src/drivers/postgresdriver.cpp +++ b/geodiff/src/drivers/postgresdriver.cpp @@ -61,9 +61,9 @@ class PostgresTransaction ///// -void PostgresDriver::logApplyConflict( const std::string &type, const ChangesetEntry &entry ) const +void PostgresDriver::logApplyConflict( const std::string &type, const ChangesetDataEntry &entry ) const { - context()->logger().warn( "CONFLICT: " + type + ":\n" + changesetEntryToJSON( entry ).dump( 2 ) ); + context()->logger().warn( "CONFLICT: " + type + ":\n" + changesetDataEntryToJSON( entry ).dump( 2 ) ); } PostgresDriver::PostgresDriver( const Context *context ) @@ -606,8 +606,8 @@ static void handleInserted( const std::string &schemaNameBase, const std::string first = false; } - ChangesetEntry e; - e.op = reverse ? ChangesetEntry::OpDelete : ChangesetEntry::OpInsert; + ChangesetDataEntry e; + e.op = reverse ? ChangesetDataEntry::OpDelete : ChangesetDataEntry::OpInsert; int numColumns = static_cast( tbl.columns.size() ); for ( int i = 0; i < numColumns; ++i ) @@ -650,8 +650,8 @@ static void handleUpdated( const std::string &schemaNameBase, const std::string ** are set to "undefined". */ - ChangesetEntry e; - e.op = ChangesetEntry::OpUpdate; + ChangesetDataEntry e; + e.op = ChangesetDataEntry::OpUpdate; int numColumns = static_cast( tbl.columns.size() ); for ( int i = 0; i < numColumns; ++i ) @@ -794,7 +794,7 @@ static std::string sqlForDelete( const std::string &schemaName, const std::strin return sql; } -ChangeApplyResult PostgresDriver::applyChange( PostgresChangeApplyState &state, const ChangesetEntry &entry ) +ChangeApplyResult PostgresDriver::applyChange( PostgresChangeApplyState &state, const ChangesetDataEntry &entry ) { std::string tableName = entry.table->name; @@ -836,7 +836,7 @@ ChangeApplyResult PostgresDriver::applyChange( PostgresChangeApplyState &state, try { PostgresChangeApplyState::TableState &tbl = state.tableState[tableName]; - if ( entry.op == ChangesetEntry::OpInsert ) + if ( entry.op == ChangesetDataEntry::OpInsert ) { std::string sql = sqlForInsert( mBaseSchema, tableName, tbl.schema, entry.newValues ); PostgresResult res = execSql( mConn, sql ); @@ -849,7 +849,7 @@ ChangeApplyResult PostgresDriver::applyChange( PostgresChangeApplyState &state, tbl.autoIncrementMax = std::max( tbl.autoIncrementMax, pkey ); } } - else if ( entry.op == ChangesetEntry::OpUpdate ) + else if ( entry.op == ChangesetDataEntry::OpUpdate ) { std::string sql = sqlForUpdate( mBaseSchema, tableName, tbl.schema, entry.oldValues, entry.newValues ); PostgresResult res = execSql( mConn, sql ); @@ -860,7 +860,7 @@ ChangeApplyResult PostgresDriver::applyChange( PostgresChangeApplyState &state, return ChangeApplyResult::NoChange; } } - else if ( entry.op == ChangesetEntry::OpDelete ) + else if ( entry.op == ChangesetDataEntry::OpDelete ) { std::string sql = sqlForDelete( mBaseSchema, tableName, tbl.schema, entry.oldValues ); PostgresResult res = execSql( mConn, sql ); @@ -901,35 +901,38 @@ void PostgresDriver::applyChangeset( ChangesetReader &reader ) // See sqlitedriver.cpp for why and how we're trying to apply changes // multiple times int unrecoverableConflictCount = 0; - std::vector conflictingEntries; + std::vector conflictingEntries; ChangesetEntry entry; PostgresChangeApplyState state; std::unordered_map> tableCopies; while ( reader.nextEntry( entry ) ) { - ChangeApplyResult res = applyChange( state, entry ); - switch ( res ) + if ( ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) { - case ChangeApplyResult::Applied: - case ChangeApplyResult::Skipped: - break; - case ChangeApplyResult::ConstraintConflict: - if ( tableCopies.count( entry.table->name ) == 0 ) - // cppcheck-suppress stlFindInsert - tableCopies[entry.table->name] = std::unique_ptr( new ChangesetTable( *entry.table ) ); - entry.table = tableCopies[entry.table->name].get(); - conflictingEntries.push_back( entry ); - break; - case ChangeApplyResult::NoChange: - unrecoverableConflictCount++; - break; + ChangeApplyResult res = applyChange( state, *dataEntry ); + switch ( res ) + { + case ChangeApplyResult::Applied: + case ChangeApplyResult::Skipped: + break; + case ChangeApplyResult::ConstraintConflict: + if ( tableCopies.count( dataEntry->table->name ) == 0 ) + // cppcheck-suppress stlFindInsert + tableCopies[dataEntry->table->name] = std::unique_ptr( new ChangesetTable( *dataEntry->table ) ); + dataEntry->table = tableCopies[dataEntry->table->name].get(); + conflictingEntries.push_back( *dataEntry ); + break; + case ChangeApplyResult::NoChange: + unrecoverableConflictCount++; + break; + } } } - std::vector newConflictingEntries; + std::vector newConflictingEntries; while ( conflictingEntries.size() > 0 ) { - for ( const ChangesetEntry ¢ry : conflictingEntries ) + for ( const ChangesetDataEntry ¢ry : conflictingEntries ) { ChangeApplyResult res = applyChange( state, centry ); switch ( res ) @@ -948,7 +951,7 @@ void PostgresDriver::applyChangeset( ChangesetReader &reader ) if ( newConflictingEntries.size() == conflictingEntries.size() ) { - for ( const ChangesetEntry ¢ry : conflictingEntries ) + for ( const ChangesetDataEntry ¢ry : conflictingEntries ) logApplyConflict( "unresolvable_conflict", centry ); throw GeoDiffConflictsException( "Could not resolve dependencies in constraint conflicts." ); } @@ -1078,8 +1081,8 @@ void PostgresDriver::dumpData( ChangesetWriter &writer, bool useModified ) writer.beginTable( schemaToChangesetTable( tableName, tbl ) ); } - ChangesetEntry e; - e.op = ChangesetEntry::OpInsert; + ChangesetDataEntry e; + e.op = ChangesetDataEntry::OpInsert; int numColumns = static_cast( tbl.columns.size() ); for ( int i = 0; i < numColumns; ++i ) { diff --git a/geodiff/src/drivers/postgresdriver.h b/geodiff/src/drivers/postgresdriver.h index 83bb277a..d4a42702 100644 --- a/geodiff/src/drivers/postgresdriver.h +++ b/geodiff/src/drivers/postgresdriver.h @@ -45,12 +45,12 @@ class PostgresDriver : public Driver void dumpData( ChangesetWriter &writer, bool useModified = false ) override; private: - void logApplyConflict( const std::string &type, const ChangesetEntry &entry ) const; + void logApplyConflict( const std::string &type, const ChangesetDataEntry &entry ) const; void openPrivate( const DriverParametersMap &conn ); void close(); std::string getSequenceObjectName( const TableSchema &tbl, int &autoIncrementPkeyIndex ); void updateSequenceObject( const std::string &seqName, int64_t maxValue ); - ChangeApplyResult applyChange( PostgresChangeApplyState &state, const ChangesetEntry &entry ); + ChangeApplyResult applyChange( PostgresChangeApplyState &state, const ChangesetDataEntry &entry ); PGconn *mConn = nullptr; std::string mBaseSchema; diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 91206c9c..0c272b83 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -15,14 +15,15 @@ #include #include +#include -void SqliteDriver::logApplyConflict( const std::string &type, const ChangesetEntry &entry, bool isDbErr ) const +void SqliteDriver::logApplyConflict( const std::string &type, const ChangesetDataEntry &entry, bool isDbErr ) const { std::string msg = "CONFLICT: " + type; if ( isDbErr ) msg += " (" + std::string( sqlite3_errmsg( mDb->get() ) ) + ")"; - msg += ":\n" + changesetEntryToJSON( entry ).dump( 2 ); + msg += ":\n" + changesetDataEntryToJSON( entry ).dump( 2 ); context()->logger().warn( msg ); } @@ -485,8 +486,8 @@ static void handleInserted( const Context *context, const std::string &tableName first = false; } - ChangesetEntry e; - e.op = reverse ? ChangesetEntry::OpDelete : ChangesetEntry::OpInsert; + ChangesetDataEntry e; + e.op = reverse ? ChangesetDataEntry::OpDelete : ChangesetDataEntry::OpInsert; size_t numColumns = tbl.columns.size(); for ( size_t i = 0; i < numColumns; ++i ) @@ -526,8 +527,8 @@ static void handleUpdated( const Context *context, const std::string &tableName, ** are set to "undefined". */ - ChangesetEntry e; - e.op = ChangesetEntry::OpUpdate; + ChangesetDataEntry e; + e.op = ChangesetDataEntry::OpUpdate; bool hasUpdates = false; size_t numColumns = tbl.columns.size(); @@ -751,7 +752,7 @@ static void bindValue( sqlite3_stmt *stmt, int index, const Value &v ) } -ChangeApplyResult SqliteDriver::applyChange( SqliteChangeApplyState &state, const ChangesetEntry &entry ) +ChangeApplyResult SqliteDriver::applyDataChange( SqliteChangeApplyState &state, const ChangesetDataEntry &entry ) { std::string tableName = entry.table->name; @@ -900,45 +901,49 @@ void SqliteDriver::applyChangeset( ChangesetReader &reader ) } int unrecoverableConflictCount = 0; - std::vector conflictingEntries; + std::vector conflictingEntries; ChangesetEntry entry; SqliteChangeApplyState state; std::unordered_map> tableCopies; while ( reader.nextEntry( entry ) ) { - ChangeApplyResult res = applyChange( state, entry ); - switch ( res ) + if ( ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) { - case ChangeApplyResult::Applied: - case ChangeApplyResult::Skipped: - break; // Applied correctly, continue onward. - case ChangeApplyResult::ConstraintConflict: - // Ordering conflict found, handle later. - // Effectively copying the entry isn't simple, since ChangesetReader is - // happy to change entry.table under our feet. We need to copy the - // table object, ideally only keeping one per table. - if ( tableCopies.count( entry.table->name ) == 0 ) - // cppcheck-suppress stlFindInsert - tableCopies[entry.table->name] = std::unique_ptr( new ChangesetTable( *entry.table ) ); - entry.table = tableCopies[entry.table->name].get(); - conflictingEntries.push_back( entry ); - break; - case ChangeApplyResult::NoChange: - unrecoverableConflictCount++; // Other issue, will throw at the end. - break; + ChangeApplyResult res = applyDataChange( state, *dataEntry ); + switch ( res ) + { + case ChangeApplyResult::Applied: + case ChangeApplyResult::Skipped: + break; // Applied correctly, continue onward. + case ChangeApplyResult::ConstraintConflict: + // Ordering conflict found, handle later. + // Effectively copying the entry isn't simple, since ChangesetReader is + // happy to change entry.table under our feet. We need to copy the + // table object, ideally only keeping one per table. + if ( tableCopies.count( dataEntry->table->name ) == 0 ) + // cppcheck-suppress stlFindInsert + tableCopies[dataEntry->table->name] = std::unique_ptr( new ChangesetTable( *dataEntry->table ) ); + dataEntry->table = tableCopies[dataEntry->table->name].get(); + conflictingEntries.push_back( *dataEntry ); + break; + case ChangeApplyResult::NoChange: + unrecoverableConflictCount++; // Other issue, will throw at the end. + break; + } } + // TODO(dvdkon): Handle DDL entries } // Applying some entries may fail due to constraints, since they require the // entries to be in some specific, unknown order. To work around this, we // retry applying the conflicting entries until either we apply them all or we // get stuck. - std::vector newConflictingEntries; + std::vector newConflictingEntries; while ( conflictingEntries.size() > 0 ) { - for ( const ChangesetEntry ¢ry : conflictingEntries ) + for ( const ChangesetDataEntry ¢ry : conflictingEntries ) { - ChangeApplyResult res = applyChange( state, centry ); + ChangeApplyResult res = applyDataChange( state, centry ); switch ( res ) { case ChangeApplyResult::Applied: @@ -957,7 +962,7 @@ void SqliteDriver::applyChangeset( ChangesetReader &reader ) // loop, then these conflicts can't be resolved by reordering entries. if ( newConflictingEntries.size() == conflictingEntries.size() ) { - for ( const ChangesetEntry ¢ry : conflictingEntries ) + for ( const ChangesetDataEntry ¢ry : conflictingEntries ) logApplyConflict( "unresolvable_conflict", centry ); throw GeoDiffConflictsException( "Could not resolve dependencies in constraint conflicts." ); } @@ -1152,8 +1157,8 @@ void SqliteDriver::dumpData( ChangesetWriter &writer, bool useModified ) first = false; } - ChangesetEntry e; - e.op = ChangesetEntry::OpInsert; + ChangesetDataEntry e; + e.op = ChangesetDataEntry::OpInsert; size_t numColumns = tbl.columns.size(); for ( size_t i = 0; i < numColumns; ++i ) { diff --git a/geodiff/src/drivers/sqlitedriver.h b/geodiff/src/drivers/sqlitedriver.h index 7bbecef1..370309ef 100644 --- a/geodiff/src/drivers/sqlitedriver.h +++ b/geodiff/src/drivers/sqlitedriver.h @@ -56,8 +56,8 @@ class SqliteDriver : public Driver void dumpData( ChangesetWriter &writer, bool useModified = false ) override; private: - void logApplyConflict( const std::string &type, const ChangesetEntry &entry, bool isDbErr = false ) const; - ChangeApplyResult applyChange( SqliteChangeApplyState &state, const ChangesetEntry &entry ); + void logApplyConflict( const std::string &type, const ChangesetDataEntry &entry, bool isDbErr = false ) const; + ChangeApplyResult applyDataChange( SqliteChangeApplyState &state, const ChangesetDataEntry &entry ); std::string databaseName( bool useModified = false ); std::shared_ptr mDb; diff --git a/geodiff/src/geodiff.cpp b/geodiff/src/geodiff.cpp index ffab38dd..ac5def05 100644 --- a/geodiff/src/geodiff.cpp +++ b/geodiff/src/geodiff.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "json.hpp" @@ -605,7 +606,8 @@ int GEODIFF_changesCount( int changesCount = 0; ChangesetEntry entry; while ( reader.nextEntry( entry ) ) - ++changesCount; + if ( std::holds_alternative( entry ) ) + ++changesCount; return changesCount; } @@ -1267,35 +1269,35 @@ void GEODIFF_CR_destroy( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetRe int GEODIFF_CE_operation( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetEntryH entryHandle ) { - return static_cast( entryHandle )->op; + return static_cast( entryHandle )->op; } GEODIFF_ChangesetTableH GEODIFF_CE_table( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetEntryH entryHandle ) { - ChangesetTable *table = static_cast( entryHandle )->table; + ChangesetTable *table = static_cast( entryHandle )->table; return table; } int GEODIFF_CE_countValues( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetEntryH entryHandle ) { - ChangesetEntry *entry = static_cast( entryHandle ); - size_t ret = entry->op == ChangesetEntry::OpDelete ? entry->oldValues.size() : entry->newValues.size(); + ChangesetDataEntry *entry = static_cast( entryHandle ); + size_t ret = entry->op == ChangesetDataEntry::OpDelete ? entry->oldValues.size() : entry->newValues.size(); return ( int ) ret; } GEODIFF_ValueH GEODIFF_CE_oldValue( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetEntryH entryHandle, int i ) { - return new Value( static_cast( entryHandle )->oldValues[i] ); + return new Value( static_cast( entryHandle )->oldValues[i] ); } GEODIFF_ValueH GEODIFF_CE_newValue( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetEntryH entryHandle, int i ) { - return new Value( static_cast( entryHandle )->newValues[i] ); + return new Value( static_cast( entryHandle )->newValues[i] ); } void GEODIFF_CE_destroy( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetEntryH entryHandle ) { - delete static_cast( entryHandle ); + delete static_cast( entryHandle ); } int GEODIFF_V_type( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ValueH valueHandle ) diff --git a/geodiff/src/geodiffrebase.cpp b/geodiff/src/geodiffrebase.cpp index 0afab253..564f0adc 100644 --- a/geodiff/src/geodiffrebase.cpp +++ b/geodiff/src/geodiffrebase.cpp @@ -1,9 +1,10 @@ -/* +/* GEODIFF - MIT License Copyright (C) 2019 Peter Petrik */ #include "geodiffrebase.hpp" +#include "changeset.h" #include "geodiffutils.hpp" #include "geodiff.h" #include "geodifflogger.hpp" @@ -22,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -191,7 +193,7 @@ struct RebaseMapping /////////////////////////////////////// -int _get_primary_key( const ChangesetEntry &entry ) +int _get_primary_key( const ChangesetDataEntry &entry ) { int fid; int nFidColumn; @@ -208,7 +210,11 @@ int _parse_old_changeset( ChangesetEntry entry; while ( reader_BASE_THEIRS.nextEntry( entry ) ) { - std::string tableName = entry.table->name; + if ( !std::holds_alternative( entry ) ) + continue; + ChangesetDataEntry &dataEntry = std::get( entry ); + + std::string tableName = dataEntry.table->name; // skip table if necessary if ( context->isTableSkipped( tableName ) ) @@ -216,21 +222,21 @@ int _parse_old_changeset( continue; } - int pk = _get_primary_key( entry ); + int pk = _get_primary_key( dataEntry ); TableRebaseInfo &tableInfo = dbInfo.tables[tableName]; - if ( entry.op == ChangesetEntry::OpInsert ) + if ( dataEntry.op == ChangesetDataEntry::OpInsert ) { tableInfo.inserted.insert( pk ); } - if ( entry.op == ChangesetEntry::OpDelete ) + if ( dataEntry.op == ChangesetDataEntry::OpDelete ) { tableInfo.deleted.insert( pk ); } - if ( entry.op == ChangesetEntry::OpUpdate ) + if ( dataEntry.op == ChangesetDataEntry::OpUpdate ) { - tableInfo.updated[pk] = entry.newValues; + tableInfo.updated[pk] = dataEntry.newValues; } } @@ -260,7 +266,11 @@ int _find_mapping_for_new_changeset( ChangesetEntry entry; while ( reader.nextEntry( entry ) ) { - std::string tableName = entry.table->name; + if ( !std::holds_alternative( entry ) ) + continue; + ChangesetDataEntry &dataEntry = std::get( entry ); + + std::string tableName = dataEntry.table->name; // skip table if necessary if ( context->isTableSkipped( tableName ) ) @@ -274,9 +284,9 @@ int _find_mapping_for_new_changeset( const TableRebaseInfo &tableInfo = tableIt->second; - if ( entry.op == ChangesetEntry::OpInsert ) + if ( dataEntry.op == ChangesetDataEntry::OpInsert ) { - int pk = _get_primary_key( entry ); + int pk = _get_primary_key( dataEntry ); if ( tableInfo.inserted.find( pk ) != tableInfo.inserted.end() ) { @@ -296,9 +306,9 @@ int _find_mapping_for_new_changeset( mapping.unmappedInsertIds[tableName].insert( pk ); } } - else if ( entry.op == ChangesetEntry::OpUpdate ) + else if ( dataEntry.op == ChangesetDataEntry::OpUpdate ) { - int pk = _get_primary_key( entry ); + int pk = _get_primary_key( dataEntry ); if ( tableInfo.deleted.find( pk ) != tableInfo.deleted.end() ) { @@ -306,9 +316,9 @@ int _find_mapping_for_new_changeset( mapping.addPkeyMapping( tableName, pk, RebaseMapping::INVALID_FID ); } } - else if ( entry.op == ChangesetEntry::OpDelete ) + else if ( dataEntry.op == ChangesetDataEntry::OpDelete ) { - int pk = _get_primary_key( entry ); + int pk = _get_primary_key( dataEntry ); if ( tableInfo.deleted.find( pk ) != tableInfo.deleted.end() ) { @@ -355,11 +365,11 @@ int _find_mapping_for_new_changeset( } -bool _handle_insert( const ChangesetEntry &entry, const RebaseMapping &mapping, ChangesetEntry &outEntry ) +bool _handle_insert( const ChangesetDataEntry &entry, const RebaseMapping &mapping, ChangesetDataEntry &outEntry ) { size_t numColumns = entry.table->columnCount(); - outEntry.op = ChangesetEntry::OpInsert; + outEntry.op = ChangesetDataEntry::OpInsert; outEntry.newValues.resize( numColumns ); // resolve primary key and patched primary key @@ -386,12 +396,12 @@ bool _handle_insert( const ChangesetEntry &entry, const RebaseMapping &mapping, return true; } -bool _handle_delete( const ChangesetEntry &entry, const RebaseMapping &mapping, - const TableRebaseInfo &tableInfo, ChangesetEntry &outEntry ) +bool _handle_delete( const ChangesetDataEntry &entry, const RebaseMapping &mapping, + const TableRebaseInfo &tableInfo, ChangesetDataEntry &outEntry ) { size_t numColumns = entry.table->columnCount(); - outEntry.op = ChangesetEntry::OpDelete; + outEntry.op = ChangesetDataEntry::OpDelete; outEntry.oldValues.resize( numColumns ); // resolve primary key and patched primary key @@ -455,13 +465,13 @@ void _addConflictItem( ConflictFeature &conflictFeature, int i, conflictFeature.addItem( item ); } -bool _handle_update( const ChangesetEntry &entry, const RebaseMapping &mapping, - const TableRebaseInfo &tableInfo, ChangesetEntry &outEntry, +bool _handle_update( const ChangesetDataEntry &entry, const RebaseMapping &mapping, + const TableRebaseInfo &tableInfo, ChangesetDataEntry &outEntry, std::vector &conflicts ) { size_t numColumns = entry.table->columnCount(); - outEntry.op = ChangesetEntry::OpUpdate; + outEntry.op = ChangesetDataEntry::OpUpdate; outEntry.oldValues.resize( numColumns ); outEntry.newValues.resize( numColumns ); @@ -538,7 +548,11 @@ void _prepare_new_changeset( const Context *context, while ( reader.nextEntry( entry ) ) { - std::string tableName = entry.table->name; + if ( !std::holds_alternative( entry ) ) + continue; + ChangesetDataEntry &dataEntry = std::get( entry ); + + std::string tableName = dataEntry.table->name; // skip table if necessary if ( context->isTableSkipped( tableName ) ) @@ -547,7 +561,7 @@ void _prepare_new_changeset( const Context *context, } // Inserts table into the definitions, if it doesn't already contain it - tableDefinitions.insert( {tableName, *entry.table} ); + tableDefinitions.insert( {tableName, *dataEntry.table} ); auto tablesIt = dbInfo.tables.find( tableName ); if ( tablesIt == dbInfo.tables.end() ) @@ -559,21 +573,21 @@ void _prepare_new_changeset( const Context *context, } bool writeEntry = false; - ChangesetEntry outEntry; + ChangesetDataEntry outEntry; // commits to same table -> now save the change to changeset - switch ( entry.op ) + switch ( dataEntry.op ) { - case ChangesetEntry::OpUpdate: - writeEntry = _handle_update( entry, mapping, tablesIt->second, outEntry, conflicts ); + case ChangesetDataEntry::OpUpdate: + writeEntry = _handle_update( dataEntry, mapping, tablesIt->second, outEntry, conflicts ); break; - case ChangesetEntry::OpInsert: - writeEntry = _handle_insert( entry, mapping, outEntry ); + case ChangesetDataEntry::OpInsert: + writeEntry = _handle_insert( dataEntry, mapping, outEntry ); break; - case ChangesetEntry::OpDelete: - writeEntry = _handle_delete( entry, mapping, tablesIt->second, outEntry ); + case ChangesetDataEntry::OpDelete: + writeEntry = _handle_delete( dataEntry, mapping, tablesIt->second, outEntry ); break; } diff --git a/geodiff/src/geodiffutils.cpp b/geodiff/src/geodiffutils.cpp index 8281ac25..ae23ca7a 100644 --- a/geodiff/src/geodiffutils.cpp +++ b/geodiff/src/geodiffutils.cpp @@ -346,7 +346,7 @@ bool isLayerTable( const std::string &tableName ) //// -void get_primary_key( const ChangesetEntry &entry, int &fid, int &nColumn ) +void get_primary_key( const ChangesetDataEntry &entry, int &fid, int &nColumn ) { const std::vector &tablePkeys = entry.table->primaryKeys; @@ -375,11 +375,11 @@ void get_primary_key( const ChangesetEntry &entry, int &fid, int &nColumn ) // now get the value Value pkeyValue; - if ( entry.op == ChangesetEntry::OpInsert ) + if ( entry.op == ChangesetDataEntry::OpInsert ) { pkeyValue = entry.newValues[pk_column_number]; } - else if ( entry.op == ChangesetEntry::OpDelete || entry.op == ChangesetEntry::OpUpdate ) + else if ( entry.op == ChangesetDataEntry::OpDelete || entry.op == ChangesetDataEntry::OpUpdate ) { pkeyValue = entry.oldValues[pk_column_number]; } diff --git a/geodiff/src/geodiffutils.hpp b/geodiff/src/geodiffutils.hpp index d4250981..20e3514a 100644 --- a/geodiff/src/geodiffutils.hpp +++ b/geodiff/src/geodiffutils.hpp @@ -20,7 +20,7 @@ #include "geodiffcontext.hpp" class Buffer; -struct ChangesetEntry; +struct ChangesetDataEntry; class GeoDiffException: public std::exception { @@ -138,7 +138,7 @@ int indexOf( const std::vector &arr, const std::string &val ); std::string concatNames( const std::vector &names ); -void get_primary_key( const ChangesetEntry &entry, int &fid, int &nColumn ); +void get_primary_key( const ChangesetDataEntry &entry, int &fid, int &nColumn ); //! Returns value of an environment variable - or returns default value if it is not set diff --git a/geodiff/tests/geodiff_testutils.cpp b/geodiff/tests/geodiff_testutils.cpp index 9913e71e..bab76935 100644 --- a/geodiff/tests/geodiff_testutils.cpp +++ b/geodiff/tests/geodiff_testutils.cpp @@ -17,6 +17,7 @@ #include #include +#include "changeset.h" #include "changesetreader.h" #include "changesetwriter.h" #include "geodiffutils.hpp" @@ -299,15 +300,15 @@ void writeSingleTableChangeset( std::string filename, const ChangesetTable &tabl } -static bool testAllEntriesInOtherVector( const std::vector &tableEntriesA, const std::vector &tableEntriesB ) +static bool testAllEntriesInOtherVector( const std::vector &tableEntriesA, const std::vector &tableEntriesB ) { for ( size_t i = 0; i < tableEntriesA.size(); ++i ) { - const ChangesetEntry &entryI = tableEntriesA[i]; + const ChangesetDataEntry &entryI = tableEntriesA[i]; bool found = false; for ( size_t j = 0; j < tableEntriesB.size(); ++j ) { - const ChangesetEntry &entryJ = tableEntriesB[j]; + const ChangesetDataEntry &entryJ = tableEntriesB[j]; if ( entryI.op == entryJ.op && entryI.oldValues == entryJ.oldValues && entryI.newValues == entryJ.newValues ) { found = true; @@ -331,20 +332,28 @@ bool compareDiffsByContent( std::string diffA, std::string diffB ) return false; std::unordered_map > tablesA, tablesB; - std::unordered_map > entriesA, entriesB; + std::unordered_map > dataEntriesA, dataEntriesB; ChangesetEntry entryA, entryB; while ( readerA.nextEntry( entryA ) ) { - if ( tablesA.find( entryA.table->name ) == tablesA.end() ) - tablesA[entryA.table->name] = entryA.table->primaryKeys; - entriesA[entryA.table->name].push_back( entryA ); + if ( ChangesetDataEntry *dataEntryA = std::get_if( &entryA ) ) + { + if ( tablesA.find( dataEntryA->table->name ) == tablesA.end() ) + tablesA[dataEntryA->table->name] = dataEntryA->table->primaryKeys; + dataEntriesA[dataEntryA->table->name].push_back( *dataEntryA ); + } + // TODO(dvdkon): Handle other entries? } while ( readerB.nextEntry( entryB ) ) { - if ( tablesB.find( entryB.table->name ) == tablesB.end() ) - tablesB[entryB.table->name] = entryB.table->primaryKeys; - entriesB[entryB.table->name].push_back( entryB ); + if ( ChangesetDataEntry *dataEntryB = std::get_if( &entryB ) ) + { + if ( tablesB.find( dataEntryB->table->name ) == tablesB.end() ) + tablesB[dataEntryB->table->name] = dataEntryB->table->primaryKeys; + dataEntriesB[dataEntryB->table->name].push_back( *dataEntryB ); + } + // TODO(dvdkon): Handle other entries? } if ( tablesA != tablesB ) @@ -353,11 +362,11 @@ bool compareDiffsByContent( std::string diffA, std::string diffB ) for ( auto tableIt = tablesA.begin(); tableIt != tablesA.end(); ++tableIt ) { std::string tableName = tableIt->first; - if ( entriesA[tableName].size() != entriesB[tableName].size() ) + if ( dataEntriesA[tableName].size() != dataEntriesB[tableName].size() ) return false; - if ( !testAllEntriesInOtherVector( entriesA[tableName], entriesB[tableName] ) ) + if ( !testAllEntriesInOtherVector( dataEntriesA[tableName], dataEntriesB[tableName] ) ) return false; - if ( !testAllEntriesInOtherVector( entriesB[tableName], entriesA[tableName] ) ) + if ( !testAllEntriesInOtherVector( dataEntriesB[tableName], dataEntriesA[tableName] ) ) return false; } return true; diff --git a/geodiff/tests/geodiff_testutils.hpp b/geodiff/tests/geodiff_testutils.hpp index d47ef034..6fe1038e 100644 --- a/geodiff/tests/geodiff_testutils.hpp +++ b/geodiff/tests/geodiff_testutils.hpp @@ -10,6 +10,7 @@ #include #include +#include "changeset.h" #include "geodiff.h" #include "geodiff_config.hpp" @@ -53,7 +54,7 @@ bool fileContentEquals( const std::string &file1, const std::string &file2 ); bool isFileEmpty( const std::string &filepath ); struct ChangesetTable; -struct ChangesetEntry; +struct ChangesetDataEntry; //! Helper function to write a diff file for a couple of tables void writeChangeset( std::string filename, const std::unordered_map &tables, diff --git a/geodiff/tests/test_changeset_reader.cpp b/geodiff/tests/test_changeset_reader.cpp index 0e8969a2..628320fe 100644 --- a/geodiff/tests/test_changeset_reader.cpp +++ b/geodiff/tests/test_changeset_reader.cpp @@ -4,6 +4,8 @@ */ #include "gtest/gtest.h" +#include +#include "changeset.h" #include "geodiff_testutils.hpp" #include "geodiff.h" @@ -26,17 +28,19 @@ TEST( ChangesetReaderTest, test_read_insert ) ChangesetEntry entry; EXPECT_TRUE( reader.nextEntry( entry ) ); - EXPECT_EQ( entry.op, ChangesetEntry::OpInsert ); - EXPECT_EQ( entry.table->name, "simple" ); - EXPECT_EQ( entry.table->primaryKeys.size(), 4 ); - EXPECT_EQ( entry.table->primaryKeys[0], true ); - EXPECT_EQ( entry.table->primaryKeys[1], false ); - EXPECT_EQ( entry.newValues.size(), 4 ); - EXPECT_EQ( entry.newValues[0].type(), Value::TypeInt ); - EXPECT_EQ( entry.newValues[0].getInt(), 4 ); - EXPECT_EQ( entry.newValues[1].type(), Value::TypeBlob ); - EXPECT_EQ( entry.newValues[2].type(), Value::TypeText ); - EXPECT_EQ( entry.newValues[2].getString(), "my new point A" ); + EXPECT_TRUE( std::holds_alternative( entry ) ); + ChangesetDataEntry &dataEntry = std::get( entry ); + EXPECT_EQ( dataEntry.op, ChangesetDataEntry::OpInsert ); + EXPECT_EQ( dataEntry.table->name, "simple" ); + EXPECT_EQ( dataEntry.table->primaryKeys.size(), 4 ); + EXPECT_EQ( dataEntry.table->primaryKeys[0], true ); + EXPECT_EQ( dataEntry.table->primaryKeys[1], false ); + EXPECT_EQ( dataEntry.newValues.size(), 4 ); + EXPECT_EQ( dataEntry.newValues[0].type(), Value::TypeInt ); + EXPECT_EQ( dataEntry.newValues[0].getInt(), 4 ); + EXPECT_EQ( dataEntry.newValues[1].type(), Value::TypeBlob ); + EXPECT_EQ( dataEntry.newValues[2].type(), Value::TypeText ); + EXPECT_EQ( dataEntry.newValues[2].getString(), "my new point A" ); EXPECT_FALSE( reader.nextEntry( entry ) ); EXPECT_FALSE( reader.nextEntry( entry ) ); @@ -51,26 +55,29 @@ TEST( ChangesetReaderTest, test_read_update ) ChangesetEntry entry; EXPECT_TRUE( reader.nextEntry( entry ) ); - EXPECT_EQ( entry.op, ChangesetEntry::OpUpdate ); - EXPECT_EQ( entry.table->name, "simple" ); + EXPECT_TRUE( std::holds_alternative( entry ) ); + ChangesetDataEntry &dataEntry = std::get( entry ); - EXPECT_EQ( entry.oldValues.size(), 4 ); - EXPECT_EQ( entry.newValues.size(), 4 ); + EXPECT_EQ( dataEntry.op, ChangesetDataEntry::OpUpdate ); + EXPECT_EQ( dataEntry.table->name, "simple" ); + + EXPECT_EQ( dataEntry.oldValues.size(), 4 ); + EXPECT_EQ( dataEntry.newValues.size(), 4 ); // pkey - unchanged - EXPECT_EQ( entry.oldValues[0].type(), Value::TypeInt ); - EXPECT_EQ( entry.oldValues[0].getInt(), 2 ); - EXPECT_EQ( entry.newValues[0].type(), Value::TypeUndefined ); + EXPECT_EQ( dataEntry.oldValues[0].type(), Value::TypeInt ); + EXPECT_EQ( dataEntry.oldValues[0].getInt(), 2 ); + EXPECT_EQ( dataEntry.newValues[0].type(), Value::TypeUndefined ); // geometry - changed - EXPECT_EQ( entry.oldValues[1].type(), Value::TypeBlob ); - EXPECT_EQ( entry.newValues[1].type(), Value::TypeBlob ); + EXPECT_EQ( dataEntry.oldValues[1].type(), Value::TypeBlob ); + EXPECT_EQ( dataEntry.newValues[1].type(), Value::TypeBlob ); // unchanged - EXPECT_EQ( entry.oldValues[2].type(), Value::TypeUndefined ); - EXPECT_EQ( entry.newValues[2].type(), Value::TypeUndefined ); + EXPECT_EQ( dataEntry.oldValues[2].type(), Value::TypeUndefined ); + EXPECT_EQ( dataEntry.newValues[2].type(), Value::TypeUndefined ); // changed - EXPECT_EQ( entry.oldValues[3].type(), Value::TypeInt ); - EXPECT_EQ( entry.oldValues[3].getInt(), 2 ); - EXPECT_EQ( entry.newValues[3].type(), Value::TypeInt ); - EXPECT_EQ( entry.newValues[3].getInt(), 9999 ); + EXPECT_EQ( dataEntry.oldValues[3].type(), Value::TypeInt ); + EXPECT_EQ( dataEntry.oldValues[3].getInt(), 2 ); + EXPECT_EQ( dataEntry.newValues[3].type(), Value::TypeInt ); + EXPECT_EQ( dataEntry.newValues[3].getInt(), 9999 ); EXPECT_FALSE( reader.nextEntry( entry ) ); } @@ -84,17 +91,20 @@ TEST( ChangesetReaderTest, test_read_delete ) ChangesetEntry entry; EXPECT_TRUE( reader.nextEntry( entry ) ); - EXPECT_EQ( entry.op, ChangesetEntry::OpDelete ); - EXPECT_EQ( entry.table->name, "simple" ); - - EXPECT_EQ( entry.oldValues.size(), 4 ); - EXPECT_EQ( entry.oldValues[0].type(), Value::TypeInt ); - EXPECT_EQ( entry.oldValues[0].getInt(), 2 ); - EXPECT_EQ( entry.oldValues[1].type(), Value::TypeBlob ); - EXPECT_EQ( entry.oldValues[2].type(), Value::TypeText ); - EXPECT_EQ( entry.oldValues[2].getString(), "feature2" ); - EXPECT_EQ( entry.oldValues[3].type(), Value::TypeInt ); - EXPECT_EQ( entry.oldValues[3].getInt(), 2 ); + EXPECT_TRUE( std::holds_alternative( entry ) ); + ChangesetDataEntry &dataEntry = std::get( entry ); + + EXPECT_EQ( dataEntry.op, ChangesetDataEntry::OpDelete ); + EXPECT_EQ( dataEntry.table->name, "simple" ); + + EXPECT_EQ( dataEntry.oldValues.size(), 4 ); + EXPECT_EQ( dataEntry.oldValues[0].type(), Value::TypeInt ); + EXPECT_EQ( dataEntry.oldValues[0].getInt(), 2 ); + EXPECT_EQ( dataEntry.oldValues[1].type(), Value::TypeBlob ); + EXPECT_EQ( dataEntry.oldValues[2].type(), Value::TypeText ); + EXPECT_EQ( dataEntry.oldValues[2].getString(), "feature2" ); + EXPECT_EQ( dataEntry.oldValues[3].type(), Value::TypeInt ); + EXPECT_EQ( dataEntry.oldValues[3].getInt(), 2 ); EXPECT_FALSE( reader.nextEntry( entry ) ); EXPECT_FALSE( reader.nextEntry( entry ) ); diff --git a/geodiff/tests/test_changeset_utils.cpp b/geodiff/tests/test_changeset_utils.cpp index 1fd95a14..c6c7e6df 100644 --- a/geodiff/tests/test_changeset_utils.cpp +++ b/geodiff/tests/test_changeset_utils.cpp @@ -48,12 +48,14 @@ TEST( ChangesetUtils, test_invert_insert ) ChangesetEntry entry; EXPECT_TRUE( readerInv.nextEntry( entry ) ); - EXPECT_EQ( entry.op, ChangesetEntry::OpDelete ); - EXPECT_EQ( entry.table->name, "simple" ); - EXPECT_EQ( entry.oldValues.size(), 4 ); - EXPECT_EQ( entry.oldValues[0].getInt(), 4 ); - EXPECT_EQ( entry.oldValues[2].getString(), "my new point A" ); - EXPECT_EQ( entry.oldValues[3].getInt(), 1 ); + EXPECT_TRUE( std::holds_alternative( entry ) ); + ChangesetDataEntry &dataEntry = std::get( entry ); + EXPECT_EQ( dataEntry.op, ChangesetDataEntry::OpDelete ); + EXPECT_EQ( dataEntry.table->name, "simple" ); + EXPECT_EQ( dataEntry.oldValues.size(), 4 ); + EXPECT_EQ( dataEntry.oldValues[0].getInt(), 4 ); + EXPECT_EQ( dataEntry.oldValues[2].getString(), "my new point A" ); + EXPECT_EQ( dataEntry.oldValues[3].getInt(), 1 ); EXPECT_FALSE( readerInv.nextEntry( entry ) ); } @@ -69,12 +71,14 @@ TEST( ChangesetUtils, test_invert_delete ) ChangesetEntry entry; EXPECT_TRUE( readerInv.nextEntry( entry ) ); - EXPECT_EQ( entry.op, ChangesetEntry::OpInsert ); - EXPECT_EQ( entry.table->name, "simple" ); - EXPECT_EQ( entry.newValues.size(), 4 ); - EXPECT_EQ( entry.newValues[0].getInt(), 2 ); - EXPECT_EQ( entry.newValues[2].getString(), "feature2" ); - EXPECT_EQ( entry.newValues[3].getInt(), 2 ); + EXPECT_TRUE( std::holds_alternative( entry ) ); + ChangesetDataEntry &dataEntry = std::get( entry ); + EXPECT_EQ( dataEntry.op, ChangesetDataEntry::OpInsert ); + EXPECT_EQ( dataEntry.table->name, "simple" ); + EXPECT_EQ( dataEntry.newValues.size(), 4 ); + EXPECT_EQ( dataEntry.newValues[0].getInt(), 2 ); + EXPECT_EQ( dataEntry.newValues[2].getString(), "feature2" ); + EXPECT_EQ( dataEntry.newValues[3].getInt(), 2 ); EXPECT_FALSE( readerInv.nextEntry( entry ) ); } @@ -90,17 +94,19 @@ TEST( ChangesetUtils, test_invert_update ) ChangesetEntry entry; EXPECT_TRUE( readerInv.nextEntry( entry ) ); - EXPECT_EQ( entry.op, ChangesetEntry::OpUpdate ); - EXPECT_EQ( entry.table->name, "simple" ); - EXPECT_EQ( entry.oldValues.size(), 4 ); - EXPECT_EQ( entry.oldValues[0].type(), Value::TypeInt ); - EXPECT_EQ( entry.oldValues[0].getInt(), 2 ); - EXPECT_EQ( entry.oldValues[2].type(), Value::TypeUndefined ); - EXPECT_EQ( entry.oldValues[3].getInt(), 9999 ); - EXPECT_EQ( entry.newValues.size(), 4 ); - EXPECT_EQ( entry.newValues[0].type(), Value::TypeUndefined ); - EXPECT_EQ( entry.newValues[2].type(), Value::TypeUndefined ); - EXPECT_EQ( entry.newValues[3].getInt(), 2 ); + EXPECT_TRUE( std::holds_alternative( entry ) ); + ChangesetDataEntry &dataEntry = std::get( entry ); + EXPECT_EQ( dataEntry.op, ChangesetDataEntry::OpUpdate ); + EXPECT_EQ( dataEntry.table->name, "simple" ); + EXPECT_EQ( dataEntry.oldValues.size(), 4 ); + EXPECT_EQ( dataEntry.oldValues[0].type(), Value::TypeInt ); + EXPECT_EQ( dataEntry.oldValues[0].getInt(), 2 ); + EXPECT_EQ( dataEntry.oldValues[2].type(), Value::TypeUndefined ); + EXPECT_EQ( dataEntry.oldValues[3].getInt(), 9999 ); + EXPECT_EQ( dataEntry.newValues.size(), 4 ); + EXPECT_EQ( dataEntry.newValues[0].type(), Value::TypeUndefined ); + EXPECT_EQ( dataEntry.newValues[2].type(), Value::TypeUndefined ); + EXPECT_EQ( dataEntry.newValues[3].getInt(), 2 ); EXPECT_FALSE( readerInv.nextEntry( entry ) ); } @@ -197,40 +203,40 @@ TEST( ChangesetUtils, test_concat_changesets_simple_table ) tableFoo.primaryKeys.push_back( false ); // name tableFoo.primaryKeys.push_back( false ); // rating - ChangesetEntry fooInsert123 = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpInsert, {}, + ChangesetDataEntry fooInsert123 = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } ); - ChangesetEntry fooDelete123 = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpDelete, + ChangesetDataEntry fooDelete123 = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpDelete, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) }, {} ); - ChangesetEntry fooUpdate123 = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpUpdate, + ChangesetDataEntry fooUpdate123 = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) }, { Value(), Value::makeText( "world" ), Value::makeInt( 4 ) } ); - ChangesetEntry fooDelete123_2 = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpDelete, + ChangesetDataEntry fooDelete123_2 = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpDelete, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) }, {} ); - ChangesetEntry fooUpdate123_2 = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpUpdate, + ChangesetDataEntry fooUpdate123_2 = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value(), Value::makeInt( 4 ) }, { Value(), Value(), Value::makeInt( 1 ) } ); - ChangesetEntry fooUpdate123_inverse = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpUpdate, + ChangesetDataEntry fooUpdate123_inverse = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) }, { Value(), Value::makeText( "hello" ), Value::makeInt( 5 ) } ); - ChangesetEntry fooUpdate123_pkey = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpUpdate, + ChangesetDataEntry fooUpdate123_pkey = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value(), Value() }, { Value::makeInt( 124 ), Value(), Value() } ); - ChangesetEntry fooUpdate456 = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpUpdate, + ChangesetDataEntry fooUpdate456 = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 456 ), Value(), Value::makeInt( 1 ) }, { Value(), Value(), Value::makeInt( 2 ) } ); @@ -238,37 +244,37 @@ TEST( ChangesetUtils, test_concat_changesets_simple_table ) testConcatOneTable( "foo-insert-update", tableFoo, { fooInsert123 }, { fooUpdate123 }, { - ChangesetEntry::make( &tableFoo, ChangesetEntry::OpInsert, {}, + ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) } - ) + ) } ); testConcatOneTable( "foo-insert-delete", tableFoo, { fooInsert123 }, { fooDelete123 }, {} ); testConcatOneTable( "foo-update-update", tableFoo, { fooUpdate123 }, { fooUpdate123_2 }, { - ChangesetEntry::make( &tableFoo, ChangesetEntry::OpUpdate, + ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) }, { Value(), Value::makeText( "world" ), Value::makeInt( 1 ) } - ) + ) } ); testConcatOneTable( "foo-update-inv-update", tableFoo, { fooUpdate123 }, { fooUpdate123_inverse }, { } ); testConcatOneTable( "foo-update-delete", tableFoo, { fooUpdate123 }, { fooDelete123_2 }, { - ChangesetEntry::make( &tableFoo, ChangesetEntry::OpDelete, + ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpDelete, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) }, {} - ) + ) } ); testConcatOneTable( "foo-delete-insert", tableFoo, { fooDelete123_2 }, { fooInsert123 }, { - ChangesetEntry::make( &tableFoo, ChangesetEntry::OpUpdate, + ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) }, { Value(), Value::makeText( "hello" ), Value::makeInt( 5 ) } - ) + ) } ); testConcatOneTable( "foo-delete-inv-insert", tableFoo, { fooDelete123 }, { fooInsert123 }, { } ); @@ -294,12 +300,12 @@ TEST( ChangesetUtils, test_concat_changesets_no_pkey_table ) tableNoPkey.primaryKeys.push_back( false ); tableNoPkey.primaryKeys.push_back( false ); - ChangesetEntry noPkeyInsert1 = ChangesetEntry::make( - &tableNoPkey, ChangesetEntry::OpInsert, {}, + ChangesetDataEntry noPkeyInsert1 = ChangesetDataEntry::make( + &tableNoPkey, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 1 ), Value::makeText( "hey" ) } ); - ChangesetEntry noPkeyUpdate2 = ChangesetEntry::make( - &tableNoPkey, ChangesetEntry::OpUpdate, + ChangesetDataEntry noPkeyUpdate2 = ChangesetDataEntry::make( + &tableNoPkey, ChangesetDataEntry::OpUpdate, { Value::makeInt( 2 ), Value::makeText( "huh" ) }, { Value(), Value::makeText( "ho!" ) } ); @@ -321,16 +327,16 @@ TEST( ChangesetUtils, test_concat_changesets_multiple_tables ) tableBar.primaryKeys.push_back( true ); // fid (pkey) tableBar.primaryKeys.push_back( false ); // name - ChangesetEntry fooInsert123 = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpInsert, {}, + ChangesetDataEntry fooInsert123 = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } ); - ChangesetEntry barInsert123 = ChangesetEntry::make( - &tableBar, ChangesetEntry::OpInsert, {}, + ChangesetDataEntry barInsert123 = ChangesetDataEntry::make( + &tableBar, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "ha!" ) } ); - ChangesetEntry barUpdate123 = ChangesetEntry::make( - &tableFoo, ChangesetEntry::OpUpdate, + ChangesetDataEntry barUpdate123 = ChangesetDataEntry::make( + &tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "ha!" ) }, { Value(), Value::makeText( ":-)" ) } ); @@ -346,13 +352,13 @@ TEST( ChangesetUtils, test_concat_changesets_multiple_tables ) // expected result { std::make_pair( "foo", std::vector( { - ChangesetEntry::make( &tableFoo, ChangesetEntry::OpInsert, {}, + ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } - ) } ) ), + ) } ) ), std::make_pair( "bar", std::vector( { - ChangesetEntry::make( &tableBar, ChangesetEntry::OpInsert, {}, + ChangesetDataEntry::make( &tableBar, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( ":-)" ) } - ) } ) ) + ) } ) ) } ); testConcat( "multi-unrelated-insert-update", @@ -364,14 +370,14 @@ TEST( ChangesetUtils, test_concat_changesets_multiple_tables ) // expected result { std::make_pair( "foo", std::vector( { - ChangesetEntry::make( &tableFoo, ChangesetEntry::OpInsert, {}, + ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } - ) } ) ), + ) } ) ), std::make_pair( "bar", std::vector( { - ChangesetEntry::make( &tableBar, ChangesetEntry::OpUpdate, + ChangesetDataEntry::make( &tableBar, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "ha!" ) }, { Value(), Value::makeText( ":-)" ) } - ) } ) ) + ) } ) ) } ); } diff --git a/geodiff/tests/test_geometry_utils.cpp b/geodiff/tests/test_geometry_utils.cpp index 892e3a52..78239d03 100644 --- a/geodiff/tests/test_geometry_utils.cpp +++ b/geodiff/tests/test_geometry_utils.cpp @@ -4,6 +4,8 @@ */ #include "gtest/gtest.h" +#include +#include "changeset.h" #include "geodiff_testutils.hpp" #include "geodiff.h" @@ -20,12 +22,17 @@ TEST( GeometryUtilsTest, test_wkb_from_geometry ) ChangesetEntry entry; EXPECT_TRUE( reader.nextEntry( entry ) ); - EXPECT_EQ( entry.table->name, "gpkg_contents" ); + EXPECT_TRUE( std::holds_alternative( entry ) ); + ChangesetDataEntry &dataEntry = std::get( entry ); + EXPECT_EQ( dataEntry.table->name, "gpkg_contents" ); EXPECT_TRUE( reader.nextEntry( entry ) ); - EXPECT_EQ( entry.table->name, "simple" ); - EXPECT_EQ( entry.oldValues[1].type(), Value::TypeBlob ); - std::string gpkgWkb = entry.oldValues[1].getString(); + EXPECT_TRUE( std::holds_alternative( entry ) ); + dataEntry = std::get( entry ); + + EXPECT_EQ( dataEntry.table->name, "simple" ); + EXPECT_EQ( dataEntry.oldValues[1].type(), Value::TypeBlob ); + std::string gpkgWkb = dataEntry.oldValues[1].getString(); const char *c_gpkgWkb = gpkgWkb.c_str(); size_t length = gpkgWkb.length(); From 8a284caf2332ab2e1ec6a267ce2f107732240fef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 9 Jan 2026 20:33:21 +0100 Subject: [PATCH 02/21] Reuse TableColumnInfo for schema change entries --- geodiff/src/changeset.h | 15 ++++---------- geodiff/src/changesetreader.cpp | 23 ++++++++++++++------- geodiff/src/changesetreader.h | 3 ++- geodiff/src/changesetutils.cpp | 18 ++++++++++------ geodiff/src/changesetwriter.cpp | 31 +++++++++++++++++++--------- geodiff/src/changesetwriter.h | 2 +- geodiff/src/drivers/postgresdriver.h | 1 + geodiff/src/drivers/sqlitedriver.h | 1 + geodiff/src/tableschema.cpp | 1 + geodiff/src/tableschema.h | 2 +- 10 files changed, 60 insertions(+), 37 deletions(-) diff --git a/geodiff/src/changeset.h b/geodiff/src/changeset.h index fc57a044..92b98a06 100644 --- a/geodiff/src/changeset.h +++ b/geodiff/src/changeset.h @@ -12,6 +12,8 @@ #include #include +#include "tableschema.h" + /** * Representation of a single value stored in a column. @@ -265,20 +267,11 @@ struct ChangesetDataEntry } }; -//! Description of column used by DDL entries -struct ChangesetDdlColumn -{ - std::string name; - std::string type; - bool isNotNull; - bool isUnique; -}; - //! Entry for CREATE TABLE command struct ChangesetCreateTableEntry { std::string tableName; - std::vector columns; + std::vector columns; }; //! Entry for DROP TABLE command @@ -291,7 +284,7 @@ struct ChangesetDropTableEntry struct ChangesetAddColumnEntry { std::string tableName; - ChangesetDdlColumn column; + TableColumnInfo column; }; //! Entry for ALTER TABLE ... DROP COLUMN command diff --git a/geodiff/src/changesetreader.cpp b/geodiff/src/changesetreader.cpp index 20847503..6eaf749e 100644 --- a/geodiff/src/changesetreader.cpp +++ b/geodiff/src/changesetreader.cpp @@ -9,6 +9,8 @@ #include "geodiffutils.hpp" #include "changesetgetvarint.h" #include "portableendian.h" +#include "sqliteutils.h" +#include "tableschema.h" #include #include @@ -223,14 +225,21 @@ ChangesetDataEntry ChangesetReader::readDataEntry( ChangesetEntryType type ) return entry; } -ChangesetDdlColumn ChangesetReader::readDdlColumn() +TableColumnInfo ChangesetReader::readColumnInfo() { - ChangesetDdlColumn column; + TableColumnInfo column; column.name = readNullTerminatedString(); - column.type = readNullTerminatedString(); + column.type.baseType = static_cast( readByte() ); + column.type.dbType = column.type.baseTypeToString( column.type.baseType ); char flags = readByte(); - column.isNotNull = flags & 0x1; - column.isUnique = flags & 0x2; + column.isPrimaryKey = flags & 1; + column.isNotNull = flags & ( 1 << 1 ); + column.isAutoIncrement = flags & ( 1 << 2 ); + column.isGeometry = flags & ( 1 << 3 ); + column.geomHasZ = flags & ( 1 << 4 ); + column.geomHasM = flags & ( 1 << 5 ); + column.geomType = readNullTerminatedString(); + column.geomSrsId = readVarint(); return column; } @@ -242,7 +251,7 @@ ChangesetCreateTableEntry ChangesetReader::readCreateTableEntry() entry.columns.resize( columnCount ); for ( size_t i = 0; i < entry.columns.size(); i++ ) { - entry.columns[i] = readDdlColumn(); + entry.columns[i] = readColumnInfo(); } return entry; } @@ -258,7 +267,7 @@ ChangesetAddColumnEntry ChangesetReader::readAddColumnEntry() { ChangesetAddColumnEntry entry; entry.tableName = readNullTerminatedString(); - entry.column = readDdlColumn(); + entry.column = readColumnInfo(); return entry; } diff --git a/geodiff/src/changesetreader.h b/geodiff/src/changesetreader.h index a6c45532..e25805d3 100644 --- a/geodiff/src/changesetreader.h +++ b/geodiff/src/changesetreader.h @@ -10,6 +10,7 @@ #include "geodiff.h" #include "changeset.h" +#include "tableschema.h" class Buffer; @@ -46,7 +47,7 @@ class ChangesetReader void readRowValues( std::vector &values ); void readTableRecord(); ChangesetDataEntry readDataEntry( ChangesetEntryType type ); - ChangesetDdlColumn readDdlColumn(); + TableColumnInfo readColumnInfo(); ChangesetCreateTableEntry readCreateTableEntry(); ChangesetDropTableEntry readDropTableEntry(); ChangesetAddColumnEntry readAddColumnEntry(); diff --git a/geodiff/src/changesetutils.cpp b/geodiff/src/changesetutils.cpp index 996bfce4..5e44e6c7 100644 --- a/geodiff/src/changesetutils.cpp +++ b/geodiff/src/changesetutils.cpp @@ -196,13 +196,19 @@ nlohmann::json changesetDataEntryToJSON( const ChangesetDataEntry &entry ) return res; } -static nlohmann::json changesetColumnToJSON( const ChangesetDdlColumn &column ) +static nlohmann::json columnInfoToJSON( const TableColumnInfo &column ) { nlohmann::json res; res["name"] = column.name; - res["type"] = column.type; + res["type"] = column.type.dbType; + res["isPrimaryKey"] = column.isPrimaryKey; res["isNotNull"] = column.isNotNull; - res["isUnique"] = column.isUnique; + res["isAutoIncrement"] = column.isAutoIncrement; + res["isGeometry"] = column.isGeometry; + res["geomType"] = column.geomType; + res["geomSrsId"] = column.geomSrsId; + res["geomHasZ"] = column.geomHasZ; + res["geomHasM"] = column.geomHasM; return res; } @@ -218,9 +224,9 @@ nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ) res["type"] = "create_table"; res["tableName"] = ctEntry->tableName; res["columns"] = nlohmann::json::array(); - for ( const ChangesetDdlColumn &column : ctEntry->columns ) + for ( const TableColumnInfo &column : ctEntry->columns ) { - res["columns"].push_back( changesetColumnToJSON( column ) ); + res["columns"].push_back( columnInfoToJSON( column ) ); } return res; } @@ -236,7 +242,7 @@ nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ) nlohmann::json res; res["type"] = "add_column"; res["tableName"] = acEntry->tableName; - res["column"] = changesetColumnToJSON( acEntry->column ); + res["column"] = columnInfoToJSON( acEntry->column ); return res; } else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) diff --git a/geodiff/src/changesetwriter.cpp b/geodiff/src/changesetwriter.cpp index 6ed7ae32..def29944 100644 --- a/geodiff/src/changesetwriter.cpp +++ b/geodiff/src/changesetwriter.cpp @@ -42,13 +42,13 @@ void ChangesetWriter::writeEntry( const ChangesetEntry &entry ) { if ( const ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) writeDataEntry( *dataEntry ); - if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) + else if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) writeCreateTableEntry( *ctEntry ); - if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) writeDropTableEntry( *dtEntry ); - if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) writeAddColumnEntry( *acEntry ); - if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) writeDropColumnEntry( *dcEntry ); else throw GeoDiffException( "Tried to write unhandled changeset entry type! " + @@ -120,11 +120,18 @@ void ChangesetWriter::writeRowValues( const std::vector &values ) } } -void ChangesetWriter::writeDdlColumn( const ChangesetDdlColumn &column ) +void ChangesetWriter::writeColumnInfo( const TableColumnInfo &column ) { writeNullTerminatedString( column.name ); - writeNullTerminatedString( column.type ); - writeByte( column.isNotNull | ( column.isUnique << 1 ) ); + writeByte( column.type.baseType ); + writeByte( column.isPrimaryKey + | ( column.isNotNull << 1 ) + | ( column.isAutoIncrement << 2 ) + | ( column.isGeometry << 3 ) + | ( column.geomHasZ << 4 ) + | ( column.geomHasM << 5 ) ); + writeNullTerminatedString( column.geomType ); + writeVarint( column.geomSrsId ); } @@ -143,27 +150,31 @@ void ChangesetWriter::writeDataEntry( const ChangesetDataEntry &entry ) void ChangesetWriter::writeCreateTableEntry( const ChangesetCreateTableEntry &entry ) { + writeByte( static_cast( ChangesetEntryType::OpCreateTable ) ); writeNullTerminatedString( entry.tableName ); writeVarint( entry.columns.size() ); - for ( const ChangesetDdlColumn &column : entry.columns ) + for ( const TableColumnInfo &column : entry.columns ) { - writeDdlColumn( column ); + writeColumnInfo( column ); } } void ChangesetWriter::writeDropTableEntry( const ChangesetDropTableEntry &entry ) { + writeByte( static_cast( ChangesetEntryType::OpDropTable ) ); writeNullTerminatedString( entry.tableName ); } void ChangesetWriter::writeAddColumnEntry( const ChangesetAddColumnEntry &entry ) { + writeByte( static_cast( ChangesetEntryType::OpAddColumn ) ); writeNullTerminatedString( entry.tableName ); - writeDdlColumn( entry.column ); + writeColumnInfo( entry.column ); } void ChangesetWriter::writeDropColumnEntry( const ChangesetDropColumnEntry &entry ) { + writeByte( static_cast( ChangesetEntryType::OpDropColumn ) ); writeNullTerminatedString( entry.tableName ); writeNullTerminatedString( entry.columnName ); } diff --git a/geodiff/src/changesetwriter.h b/geodiff/src/changesetwriter.h index 3f5d5676..dfe7f908 100644 --- a/geodiff/src/changesetwriter.h +++ b/geodiff/src/changesetwriter.h @@ -43,7 +43,7 @@ class ChangesetWriter void writeNullTerminatedString( const std::string &str ); void writeRowValues( const std::vector &values ); - void writeDdlColumn( const ChangesetDdlColumn &column ); + void writeColumnInfo( const TableColumnInfo &column ); void writeDataEntry( const ChangesetDataEntry &entry ); void writeCreateTableEntry( const ChangesetCreateTableEntry &entry ); diff --git a/geodiff/src/drivers/postgresdriver.h b/geodiff/src/drivers/postgresdriver.h index d4a42702..12b5f6cf 100644 --- a/geodiff/src/drivers/postgresdriver.h +++ b/geodiff/src/drivers/postgresdriver.h @@ -7,6 +7,7 @@ #define POSTGRESDRIVER_H #include "driver.h" +#include "changeset.h" extern "C" { diff --git a/geodiff/src/drivers/sqlitedriver.h b/geodiff/src/drivers/sqlitedriver.h index 370309ef..c16cabbe 100644 --- a/geodiff/src/drivers/sqlitedriver.h +++ b/geodiff/src/drivers/sqlitedriver.h @@ -10,6 +10,7 @@ #include "driver.h" #include "sqliteutils.h" +#include "changeset.h" /** * Holds state that is useful to keep between entries when applying changeset. diff --git a/geodiff/src/tableschema.cpp b/geodiff/src/tableschema.cpp index e67364be..7b4ed218 100644 --- a/geodiff/src/tableschema.cpp +++ b/geodiff/src/tableschema.cpp @@ -8,6 +8,7 @@ #include "geodiffcontext.hpp" #include "geodifflogger.hpp" +#include "geodiffutils.hpp" #include diff --git a/geodiff/src/tableschema.h b/geodiff/src/tableschema.h index 3cb1410a..30a7d356 100644 --- a/geodiff/src/tableschema.h +++ b/geodiff/src/tableschema.h @@ -10,7 +10,7 @@ #include #include -#include "geodiffutils.hpp" +#include "geodiffcontext.hpp" /* Information about column type, converted to base type */ struct TableColumnType From 7b382c60dc56d72ca32c37b29ce50f966d99f3e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Sun, 3 May 2026 22:35:57 +0200 Subject: [PATCH 03/21] Add support for schema diff in SQLite --- geodiff/CMakeLists.txt | 2 + geodiff/src/drivers/sqlitedriver.cpp | 258 +++++++++++++++++++-------- geodiff/src/drivers/sqlitedriver.h | 2 + geodiff/src/tableschema.h | 6 + geodiff/src/tableschemadiff.cpp | 122 +++++++++++++ geodiff/src/tableschemadiff.hpp | 15 ++ 6 files changed, 330 insertions(+), 75 deletions(-) create mode 100644 geodiff/src/tableschemadiff.cpp create mode 100644 geodiff/src/tableschemadiff.hpp diff --git a/geodiff/CMakeLists.txt b/geodiff/CMakeLists.txt index 4ae76d28..70eb9ff2 100644 --- a/geodiff/CMakeLists.txt +++ b/geodiff/CMakeLists.txt @@ -153,6 +153,8 @@ SET(geodiff_src src/driver.h src/tableschema.cpp src/tableschema.h + src/tableschemadiff.cpp + src/tableschemadiff.hpp src/drivers/sqlitedriver.cpp src/drivers/sqlitedriver.h diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 0c272b83..48165707 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -12,6 +12,8 @@ #include "geodifflogger.hpp" #include "geodiffutils.hpp" #include "sqliteutils.h" +#include "tableschema.h" +#include "tableschemadiff.hpp" #include #include @@ -133,11 +135,19 @@ void SqliteDriver::open( const DriverParametersMap &conn ) throw GeoDiffException( "Missing 'modified' file when opening sqlite driver: " + modified ); } - mDb->open( modified ); + mDb->open( ":memory:" ); - Buffer sqlBuf; - sqlBuf.printf( "ATTACH '%q' AS aux", base.c_str() ); - mDb->exec( sqlBuf ); + { + Buffer sqlBuf; + sqlBuf.printf( "ATTACH '%q' AS base", base.c_str() ); + mDb->exec( sqlBuf ); + } + + { + Buffer sqlBuf; + sqlBuf.printf( "ATTACH '%q' AS modified", modified.c_str() ); + mDb->exec( sqlBuf ); + } } else { @@ -181,13 +191,13 @@ std::string SqliteDriver::databaseName( bool useModified ) { if ( mHasModified ) { - return useModified ? "main" : "aux"; + return useModified ? "modified" : "base"; } else { if ( useModified ) throw GeoDiffException( "'modified' table not open" ); - return "main"; + return "modified"; } } @@ -251,7 +261,7 @@ bool tableExists( std::shared_ptr db, const std::string &tableName, c { Sqlite3Stmt stmtHasGeomColumnsInfo; stmtHasGeomColumnsInfo.prepare( db, "SELECT name FROM \"%w\".sqlite_master WHERE type='table' " - "AND name='%q'", dbName.c_str(), tableName.c_str() ); + "AND name='%q'", dbName.c_str(), tableName.c_str() ); return sqlite3_step( stmtHasGeomColumnsInfo.get() ) == SQLITE_ROW; } @@ -370,6 +380,16 @@ TableSchema SqliteDriver::tableSchema( const std::string &tableName, return tbl; } +DatabaseSchema SqliteDriver::getSchema( bool useModified ) +{ + std::vector tables; + for ( auto &name : listTables( useModified ) ) + { + tables.push_back( tableSchema( name, useModified ) ); + } + return {tables}; +} + /** * printf() with sqlite extensions - see https://www.sqlite.org/printf.html * for extra format options like %q or %Q @@ -390,64 +410,124 @@ static std::string sqlitePrintf( const char *zFormat, ... ) return res; } +struct TableDiffContext +{ + std::shared_ptr db; + const TableSchema &schemaBase; + const TableSchema &schemaModified; + std::vector commonColumns; + std::vector newColumns; + ChangesetWriter &writer; + bool tableEntryWritten = false; +}; + +static std::string sqlColumnsStr( const TableDiffContext &diffContext, bool reverse ) +{ + const char *tableName = ( reverse ? diffContext.schemaBase.name : diffContext.schemaModified.name ).c_str(); + + std::string colsStr; // Column list equivalent to modified schema + for ( const TableColumnInfo &c : diffContext.schemaModified.columns ) + { + if ( !colsStr.empty() ) + colsStr += ", "; + if ( reverse ) + { + // Check if this column also exists in base and NULL it out if not + bool found = false; + for ( const auto &commonCol : diffContext.commonColumns ) + { + if ( commonCol.name == c.name ) + { + found = true; + break; + } + } + if ( !found ) + { + colsStr += sqlitePrintf( "NULL AS \"%w\"", c.name.c_str() ); + continue; + } + } + colsStr += sqlitePrintf( "\"%w\".\"%w\".\"%w\"", + reverse ? "base" : "modified", tableName, c.name.c_str() ); + } + return colsStr; +} + //! Constructs SQL query to get all rows that do not exist in the other table (used for insert and delete) -static std::string sqlFindInserted( const std::string &tableName, const TableSchema &tbl, bool reverse ) +static std::string sqlFindInserted( const TableDiffContext &diffContext, bool reverse ) { - std::string exprPk; - for ( const TableColumnInfo &c : tbl.columns ) + const char *baseTableName = diffContext.schemaBase.name.c_str(); + const char *modifiedTableName = diffContext.schemaModified.name.c_str(); + + std::string exprPk; // Filter expression checking primary key is equal + for ( const TableColumnInfo &c : diffContext.commonColumns ) { if ( c.isPrimaryKey ) { if ( !exprPk.empty() ) exprPk += " AND "; - exprPk += sqlitePrintf( "\"%w\".\"%w\".\"%w\"=\"%w\".\"%w\".\"%w\"", - "main", tableName.c_str(), c.name.c_str(), "aux", tableName.c_str(), c.name.c_str() ); + exprPk += sqlitePrintf( "\"modified\".\"%w\".\"%w\"=\"base\".\"%w\".\"%w\"", + modifiedTableName, c.name.c_str(), baseTableName, c.name.c_str() ); } } - std::string sql = sqlitePrintf( "SELECT * FROM \"%w\".\"%w\" WHERE NOT EXISTS ( SELECT 1 FROM \"%w\".\"%w\" WHERE %s)", - reverse ? "aux" : "main", tableName.c_str(), - reverse ? "main" : "aux", tableName.c_str(), exprPk.c_str() ); + std::string sql = sqlitePrintf( "SELECT %s FROM \"%w\".\"%w\" WHERE NOT EXISTS ( SELECT 1 FROM \"%w\".\"%w\" WHERE %s)", + sqlColumnsStr( diffContext, reverse ).c_str(), + reverse ? "base" : "modified", reverse ? baseTableName : modifiedTableName, + reverse ? "modified" : "base", reverse ? modifiedTableName : baseTableName, exprPk.c_str() ); return sql; } //! Constructs SQL query to get all modified rows for a single table -static std::string sqlFindModified( const std::string &tableName, const TableSchema &tbl ) +static std::string sqlFindModified( const TableDiffContext &diffContext ) { + const char *baseTableName = diffContext.schemaBase.name.c_str(); + const char *modifiedTableName = diffContext.schemaModified.name.c_str(); + std::string exprPk; std::string exprOther; - for ( const TableColumnInfo &c : tbl.columns ) + for ( const TableColumnInfo &c : diffContext.commonColumns ) { if ( c.isPrimaryKey ) { if ( !exprPk.empty() ) exprPk += " AND "; - exprPk += sqlitePrintf( "\"%w\".\"%w\".\"%w\"=\"%w\".\"%w\".\"%w\"", - "main", tableName.c_str(), c.name.c_str(), "aux", tableName.c_str(), c.name.c_str() ); + exprPk += sqlitePrintf( "\"modified\".\"%w\".\"%w\"=\"base\".\"%w\".\"%w\"", + modifiedTableName, c.name.c_str(), baseTableName, c.name.c_str() ); } else // not a primary key column { if ( !exprOther.empty() ) exprOther += " OR "; - exprOther += sqlitePrintf( "\"%w\".\"%w\".\"%w\" IS NOT \"%w\".\"%w\".\"%w\"", - "main", tableName.c_str(), c.name.c_str(), "aux", tableName.c_str(), c.name.c_str() ); + exprOther += sqlitePrintf( "\"modified\".\"%w\".\"%w\" IS NOT \"base\".\"%w\".\"%w\"", + modifiedTableName, c.name.c_str(), baseTableName, c.name.c_str() ); } } - std::string sql; + + // Check for non-NULL values in newly-added columns + for ( const TableColumnInfo &c : diffContext.newColumns ) + { + if ( !exprOther.empty() ) + exprOther += " OR "; + + exprOther += sqlitePrintf( "\"modified\".\"%w\".\"%w\" IS NOT NULL", + modifiedTableName, c.name.c_str() ); + } + + std::string colsStr = sqlColumnsStr( diffContext, false ) + ", " + sqlColumnsStr( diffContext, true ); if ( exprOther.empty() ) { - sql = sqlitePrintf( "SELECT * FROM \"%w\".\"%w\", \"%w\".\"%w\" WHERE %s", - "main", tableName.c_str(), "aux", tableName.c_str(), exprPk.c_str() ); + return sqlitePrintf( "SELECT %s FROM \"modified\".\"%w\", \"base\".\"%w\" WHERE %s", + colsStr.c_str(), modifiedTableName, baseTableName, exprPk.c_str() ); } else { - sql = sqlitePrintf( "SELECT * FROM \"%w\".\"%w\", \"%w\".\"%w\" WHERE %s AND (%s)", - "main", tableName.c_str(), "aux", tableName.c_str(), exprPk.c_str(), exprOther.c_str() ); + return sqlitePrintf( "SELECT %s FROM \"modified\".\"%w\", \"base\".\"%w\" WHERE %s AND (%s)", + colsStr.c_str(), modifiedTableName, baseTableName, exprPk.c_str(), exprOther.c_str() ); } - - return sql; } @@ -471,25 +551,25 @@ static Value changesetValue( sqlite3_value *v ) return x; } -static void handleInserted( const Context *context, const std::string &tableName, const TableSchema &tbl, bool reverse, std::shared_ptr db, ChangesetWriter &writer, bool &first ) +static void handleInserted( const Context *context, TableDiffContext &diffContext, bool reverse ) { - std::string sqlInserted = sqlFindInserted( tableName, tbl, reverse ); + std::string sqlInserted = sqlFindInserted( diffContext, reverse ); Sqlite3Stmt statementI; - statementI.prepare( db, "%s", sqlInserted.c_str() ); + statementI.prepare( diffContext.db, "%s", sqlInserted.c_str() ); int rc; while ( SQLITE_ROW == ( rc = sqlite3_step( statementI.get() ) ) ) { - if ( first ) + if ( !diffContext.tableEntryWritten ) { - ChangesetTable chTable = schemaToChangesetTable( tableName, tbl ); - writer.beginTable( chTable ); - first = false; + ChangesetTable chTable = schemaToChangesetTable( diffContext.schemaModified.name, diffContext.schemaModified ); + diffContext.writer.beginTable( chTable ); + diffContext.tableEntryWritten = false; } ChangesetDataEntry e; e.op = reverse ? ChangesetDataEntry::OpDelete : ChangesetDataEntry::OpInsert; - size_t numColumns = tbl.columns.size(); + size_t numColumns = diffContext.schemaModified.columns.size(); for ( size_t i = 0; i < numColumns; ++i ) { Sqlite3Value v( sqlite3_column_value( statementI.get(), static_cast( i ) ) ); @@ -499,20 +579,20 @@ static void handleInserted( const Context *context, const std::string &tableName e.newValues.push_back( changesetValue( v.value() ) ); } - writer.writeEntry( e ); + diffContext.writer.writeEntry( e ); } if ( rc != SQLITE_DONE ) { - logSqliteError( context, db, "Failed to write information about inserted rows in table " + tableName ); + logSqliteError( context, diffContext.db, "Failed to write information about inserted rows in table " + diffContext.schemaModified.name ); } } -static void handleUpdated( const Context *context, const std::string &tableName, const TableSchema &tbl, std::shared_ptr db, ChangesetWriter &writer, bool &first ) +static void handleUpdated( const Context *context, TableDiffContext &diffContext ) { - std::string sqlModified = sqlFindModified( tableName, tbl ); + std::string sqlModified = sqlFindModified( diffContext ); Sqlite3Stmt statement; - statement.prepare( db, "%s", sqlModified.c_str() ); + statement.prepare( diffContext.db, "%s", sqlModified.c_str() ); int rc; while ( SQLITE_ROW == ( rc = sqlite3_step( statement.get() ) ) ) { @@ -531,12 +611,12 @@ static void handleUpdated( const Context *context, const std::string &tableName, e.op = ChangesetDataEntry::OpUpdate; bool hasUpdates = false; - size_t numColumns = tbl.columns.size(); + size_t numColumns = diffContext.schemaModified.columns.size(); for ( size_t i = 0; i < numColumns; ++i ) { Sqlite3Value v1( sqlite3_column_value( statement.get(), static_cast( i + numColumns ) ) ); Sqlite3Value v2( sqlite3_column_value( statement.get(), static_cast( i ) ) ); - bool pkey = tbl.columns[i].isPrimaryKey; + bool pkey = diffContext.schemaModified.columns[i].isPrimaryKey; bool updated = ( v1 != v2 ); if ( updated ) { @@ -544,10 +624,10 @@ static void handleUpdated( const Context *context, const std::string &tableName, // multiple different string representations could be used for a single datetime value, // see "Time Values" section in https://sqlite.org/lang_datefunc.html // Use strftime() to take into account fractional seconds - if ( tbl.columns[i].type == TableColumnType::DATETIME ) + if ( diffContext.schemaModified.columns[i].type == TableColumnType::DATETIME ) { Sqlite3Stmt stmtDatetime; - stmtDatetime.prepare( db, "SELECT STRFTIME('%%Y-%%m-%%d %%H:%%M:%%f', ?1) IS NOT STRFTIME('%%Y-%%m-%%d %%H:%%M:%%f', ?2)" ); + stmtDatetime.prepare( diffContext.db, "SELECT STRFTIME('%%Y-%%m-%%d %%H:%%M:%%f', ?1) IS NOT STRFTIME('%%Y-%%m-%%d %%H:%%M:%%f', ?2)" ); sqlite3_bind_value( stmtDatetime.get(), 1, v1.value() ); sqlite3_bind_value( stmtDatetime.get(), 2, v2.value() ); int res = sqlite3_step( stmtDatetime.get() ); @@ -557,7 +637,7 @@ static void handleUpdated( const Context *context, const std::string &tableName, } else if ( SQLITE_DONE != res ) { - logSqliteError( context, db, "Failed to write information about updated rows in table " + tableName ); + logSqliteError( context, diffContext.db, "Failed to write information about updated rows in table " + diffContext.schemaModified.name ); } } @@ -572,56 +652,84 @@ static void handleUpdated( const Context *context, const std::string &tableName, if ( hasUpdates ) { - if ( first ) + if ( !diffContext.tableEntryWritten ) { - ChangesetTable chTable = schemaToChangesetTable( tableName, tbl ); - writer.beginTable( chTable ); - first = false; + ChangesetTable chTable = schemaToChangesetTable( diffContext.schemaModified.name, diffContext.schemaModified ); + diffContext.writer.beginTable( chTable ); + diffContext.tableEntryWritten = true; } - writer.writeEntry( e ); + diffContext.writer.writeEntry( e ); } } if ( rc != SQLITE_DONE ) { - logSqliteError( context, db, "Failed to write information about inserted rows in table " + tableName ); + logSqliteError( context, diffContext.db, "Failed to write information about inserted rows in table " + diffContext.schemaModified.name ); } } void SqliteDriver::createChangeset( ChangesetWriter &writer ) { - std::vector tablesBase = listTables( false ); - std::vector tablesModified = listTables( true ); + DatabaseSchema schemaBase = getSchema( false ); + DatabaseSchema schemaModified = getSchema( true ); - if ( tablesBase != tablesModified ) + auto schemaDiffEntries = diffDatabaseSchema( schemaBase, schemaModified ); + for ( const ChangesetEntry &entry : schemaDiffEntries ) { - throw GeoDiffException( "Table names are not matching between the input databases.\n" - "Base: " + concatNames( tablesBase ) + "\n" + - "Modified: " + concatNames( tablesModified ) ); + writer.writeEntry( entry ); } - for ( const std::string &tableName : tablesBase ) + for ( const TableSchema &tblBase : schemaBase.tables ) { - TableSchema tbl = tableSchema( tableName ); - TableSchema tblNew = tableSchema( tableName, true ); + if ( !tblBase.hasPrimaryKey() ) + continue; // ignore tables without primary key - they can't be compared properly - // test that table schema in the modified is the same - if ( tbl != tblNew ) + // Find corresponding table in modified DB + const TableSchema *tblModified = nullptr; + for ( const TableSchema &tbl : schemaModified.tables ) { - if ( !tbl.compareWithBaseTypes( tblNew ) ) - throw GeoDiffException( "GeoPackage Table schemas are not the same for table: " + tableName ); + if ( tbl.name == tblBase.name ) + { + tblModified = &tbl; + break; + } } + if ( !tblModified ) + continue; // Table was deleted - if ( !tbl.hasPrimaryKey() ) - continue; // ignore tables without primary key - they can't be compared properly + TableDiffContext diffContext = { mDb, tblBase, *tblModified, {}, {}, writer }; - bool first = true; + for ( const TableColumnInfo &baseColumn : tblBase.columns ) + { + for ( const TableColumnInfo &modifiedColumn : tblModified->columns ) + { + if ( baseColumn.name == modifiedColumn.name ) + { + diffContext.commonColumns.push_back( modifiedColumn ); + break; + } + } + } - handleInserted( context(), tableName, tbl, false, mDb, writer, first ); // INSERT - handleInserted( context(), tableName, tbl, true, mDb, writer, first ); // DELETE - handleUpdated( context(), tableName, tbl, mDb, writer, first ); // UPDATE - } + for ( const TableColumnInfo &modifiedColumn : tblModified->columns ) + { + bool found = false; + for ( const TableColumnInfo &baseColumn : tblBase.columns ) + { + if ( baseColumn.name == modifiedColumn.name ) + { + found = true; + break; + } + } + if ( !found ) + diffContext.newColumns.push_back( modifiedColumn ); + } + handleInserted( context(), diffContext, false ); // INSERT + handleInserted( context(), diffContext, true ); // DELETE + handleUpdated( context(), diffContext ); // UPDATE + } } static std::string sqlForInsert( const std::string &tableName, const TableSchema &tbl ) @@ -1043,7 +1151,7 @@ static void addGpkgSpatialTable( std::shared_ptr db, const TableSchem Sqlite3Stmt stmt; stmt.prepare( db, "INSERT INTO gpkg_contents (table_name, data_type, identifier, min_x, min_y, max_x, max_y, srs_id) " - "VALUES ('%q', 'features', '%q', %f, %f, %f, %f, %d)", + "VALUES ('%q', 'features', '%q', %f, %f, %f, %f, %d)", tbl.name.c_str(), tbl.name.c_str(), extent.minX, extent.minY, extent.maxX, extent.maxY, srsId ); int res = sqlite3_step( stmt.get() ); if ( res != SQLITE_DONE ) @@ -1071,7 +1179,7 @@ void SqliteDriver::createTables( const std::vector &tables ) // currently we always create geopackage meta tables. Maybe in the future we can skip // that if there is a reason, and have that optional if none of the tables are spatial. Sqlite3Stmt stmt1; - stmt1.prepare( mDb, "SELECT InitSpatialMetadata('main');" ); + stmt1.prepare( mDb, "SELECT InitSpatialMetadata('modified');" ); int res = sqlite3_step( stmt1.get() ); if ( res != SQLITE_ROW ) { @@ -1114,7 +1222,7 @@ void SqliteDriver::createTables( const std::vector &tables ) } } - sql = sqlitePrintf( "CREATE TABLE \"%w\".\"%w\" (", "main", tbl.name.c_str() ); + sql = sqlitePrintf( "CREATE TABLE \"%w\".\"%w\" (", "modified", tbl.name.c_str() ); if ( !columns.empty() ) { sql += columns; diff --git a/geodiff/src/drivers/sqlitedriver.h b/geodiff/src/drivers/sqlitedriver.h index c16cabbe..cca70f2d 100644 --- a/geodiff/src/drivers/sqlitedriver.h +++ b/geodiff/src/drivers/sqlitedriver.h @@ -11,6 +11,7 @@ #include "driver.h" #include "sqliteutils.h" #include "changeset.h" +#include "tableschema.h" /** * Holds state that is useful to keep between entries when applying changeset. @@ -51,6 +52,7 @@ class SqliteDriver : public Driver void create( const DriverParametersMap &conn, bool overwrite = false ) override; std::vector listTables( bool useModified = false ) override; TableSchema tableSchema( const std::string &tableName, bool useModified = false ) override; + DatabaseSchema getSchema( bool useModified = false ); void createChangeset( ChangesetWriter &writer ) override; void applyChangeset( ChangesetReader &reader ) override; void createTables( const std::vector &tables ) override; diff --git a/geodiff/src/tableschema.h b/geodiff/src/tableschema.h index 30a7d356..96fe49ed 100644 --- a/geodiff/src/tableschema.h +++ b/geodiff/src/tableschema.h @@ -225,6 +225,12 @@ struct TableSchema } }; +/** Information about all tables in the database. */ +struct DatabaseSchema +{ + std::vector tables; +}; + //! Converts column name to base type and returns struct with both names TableColumnType columnType( const Context *context, const std::string &columnType, const std::string &driverName, bool isGeometry = false ); diff --git a/geodiff/src/tableschemadiff.cpp b/geodiff/src/tableschemadiff.cpp new file mode 100644 index 00000000..6d0d3025 --- /dev/null +++ b/geodiff/src/tableschemadiff.cpp @@ -0,0 +1,122 @@ +/* + GEODIFF - MIT License + Copyright (C) 2026 David Koňařík +*/ + +#include "tableschemadiff.hpp" +#include "changeset.h" +#include "geodiffutils.hpp" +#include "tableschema.h" +#include +#include +#include + +template +static std::vector names( const std::vector &items ) +{ + std::vector names; + names.reserve( items.size() ); + for ( const auto &item : items ) + { + names.push_back( item.name ); + } + return names; +} + +template std::vector names( const std::vector &items ); + +template +static std::unordered_map byName( const std::vector &items ) +{ + std::unordered_map map; + for ( const T &item : items ) + { + map[item.name] = &item; + } + return map; +} + +std::vector diffTableSchema( const TableSchema &base, const TableSchema &modified ) +{ + if ( base.crs != modified.crs ) + throw GeoDiffException( "Tried to compare tables with different CRSs (named" + + base.name + " and " + modified.name + ")" ); + + std::vector entries; + + const std::unordered_map baseColumns = byName( base.columns ); + const std::unordered_map modifiedColumns = byName( modified.columns ); + const std::vector baseColNames = names( base.columns ); + const std::vector modifiedColNames = names( modified.columns ); + + std::vector deletedColNames; + std::set_difference( baseColNames.begin(), baseColNames.end(), + modifiedColNames.begin(), modifiedColNames.end(), + std::back_inserter( deletedColNames ) ); + for ( const std::string &colName : deletedColNames ) + { + entries.push_back( ChangesetDropColumnEntry{base.name, colName} ); + } + + std::vector newColNames; + std::set_difference( modifiedColNames.begin(), modifiedColNames.end(), + baseColNames.begin(), baseColNames.end(), + std::back_inserter( newColNames ) ); + for ( const std::string &colName : newColNames ) + { + entries.push_back( ChangesetAddColumnEntry{base.name, *modifiedColumns.at( colName )} ); + } + + std::vector oldColNames; + std::set_intersection( modifiedColNames.begin(), modifiedColNames.end(), + baseColNames.begin(), baseColNames.end(), + std::back_inserter( oldColNames ) ); + for ( const std::string &colName : oldColNames ) + { + if ( *baseColumns.at( colName ) != *modifiedColumns.at( colName ) ) + throw GeoDiffException( "Columns differ: " + + base.name + "." + colName + " and " + modified.name + "." + colName + ")" ); + } + + return entries; +} + +std::vector diffDatabaseSchema( const DatabaseSchema &base, const DatabaseSchema &modified ) +{ + std::vector entries; + + const std::unordered_map baseTables = byName( base.tables ); + const std::unordered_map modifiedTables = byName( modified.tables ); + const std::vector baseTableNames = names( base.tables ); + const std::vector modifiedTableNames = names( modified.tables ); + + std::vector deletedTableNames; + std::set_difference( baseTableNames.begin(), baseTableNames.end(), + modifiedTableNames.begin(), modifiedTableNames.end(), + std::back_inserter( deletedTableNames ) ); + for ( const std::string &name : deletedTableNames ) + { + entries.push_back( ChangesetDropTableEntry{name} ); + } + + std::vector newTableNames; + std::set_difference( modifiedTableNames.begin(), modifiedTableNames.end(), + baseTableNames.begin(), baseTableNames.end(), + std::back_inserter( newTableNames ) ); + for ( const std::string &name : newTableNames ) + { + entries.push_back( ChangesetCreateTableEntry{name, modifiedTables.at( name )->columns} ); + } + + std::vector oldTableNames; + std::set_intersection( modifiedTableNames.begin(), modifiedTableNames.end(), + baseTableNames.begin(), baseTableNames.end(), + std::back_inserter( oldTableNames ) ); + for ( const std::string &name : oldTableNames ) + { + std::vector tableEntries = diffTableSchema( *baseTables.at( name ), *modifiedTables.at( name ) ); + entries.insert( entries.end(), tableEntries.begin(), tableEntries.end() ); + } + + return entries; +} diff --git a/geodiff/src/tableschemadiff.hpp b/geodiff/src/tableschemadiff.hpp new file mode 100644 index 00000000..e1912a4e --- /dev/null +++ b/geodiff/src/tableschemadiff.hpp @@ -0,0 +1,15 @@ +/* + GEODIFF - MIT License + Copyright (C) 2026 David Koňařík +*/ + +#ifndef TABLESCHEMADIFF_H +#define TABLESCHEMADIFF_H + +#include "changeset.h" +#include "tableschema.h" + +std::vector diffTableSchema( const TableSchema &base, const TableSchema &modified ); +std::vector diffDatabaseSchema( const DatabaseSchema &base, const DatabaseSchema &modified ); + +#endif // TABLESCHEMADIFF_H From 855e15cfd4098b9c0120e49711249817493a7c92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Mon, 4 May 2026 10:47:41 +0200 Subject: [PATCH 04/21] Make schema changes invertible --- geodiff/src/changeset.h | 3 +- geodiff/src/changesetreader.cpp | 8 +- geodiff/src/changesetutils.cpp | 81 ++++++++++++--- geodiff/src/changesetwriter.cpp | 7 +- geodiff/src/drivers/sqlitedriver.cpp | 150 ++++++++++++++++++++++----- geodiff/src/drivers/sqliteutils.cpp | 2 +- geodiff/src/tableschemadiff.cpp | 9 +- 7 files changed, 208 insertions(+), 52 deletions(-) diff --git a/geodiff/src/changeset.h b/geodiff/src/changeset.h index 92b98a06..c381e42d 100644 --- a/geodiff/src/changeset.h +++ b/geodiff/src/changeset.h @@ -278,6 +278,7 @@ struct ChangesetCreateTableEntry struct ChangesetDropTableEntry { std::string tableName; + std::vector columns; }; //! Entry for ALTER TABLE ... ADD COLUMN command @@ -291,7 +292,7 @@ struct ChangesetAddColumnEntry struct ChangesetDropColumnEntry { std::string tableName; - std::string columnName; + TableColumnInfo column; }; struct ChangesetEntry : public std::variant < diff --git a/geodiff/src/changesetreader.cpp b/geodiff/src/changesetreader.cpp index 6eaf749e..b74f97f2 100644 --- a/geodiff/src/changesetreader.cpp +++ b/geodiff/src/changesetreader.cpp @@ -260,6 +260,12 @@ ChangesetDropTableEntry ChangesetReader::readDropTableEntry() { ChangesetDropTableEntry entry; entry.tableName = readNullTerminatedString(); + int columnCount = readVarint(); + entry.columns.resize( columnCount ); + for ( size_t i = 0; i < entry.columns.size(); i++ ) + { + entry.columns[i] = readColumnInfo(); + } return entry; } @@ -275,7 +281,7 @@ ChangesetDropColumnEntry ChangesetReader::readDropColumnEntry() { ChangesetDropColumnEntry entry; entry.tableName = readNullTerminatedString(); - entry.columnName = readNullTerminatedString(); + entry.column = readColumnInfo(); return entry; } diff --git a/geodiff/src/changesetutils.cpp b/geodiff/src/changesetutils.cpp index 5e44e6c7..f309cffc 100644 --- a/geodiff/src/changesetutils.cpp +++ b/geodiff/src/changesetutils.cpp @@ -11,6 +11,8 @@ #include "changesetreader.h" #include "changesetwriter.h" #include "tableschema.h" +#include +#include ChangesetTable schemaToChangesetTable( const std::string &tableName, const TableSchema &tbl ) @@ -22,55 +24,59 @@ ChangesetTable schemaToChangesetTable( const std::string &tableName, const Table return chTable; } -void invertChangeset( ChangesetReader &reader, ChangesetWriter &writer ) +// Returns inverted changeset entries in reverse order +std::tuple>, std::vector> invertChangesetReverse( ChangesetReader &reader ) { std::string currentTableName; - std::vector currentPkeys; + std::unordered_map> tables; + std::vector invertedEntries; ChangesetEntry entry; while ( reader.nextEntry( entry ) ) { if ( ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) { - assert( dataEntry->table ); - if ( dataEntry->table->name != currentTableName ) + if ( !dataEntry->table ) + throw GeoDiffException( "ChangesetDataEntry without table data read!" ); + if ( !tables.count( dataEntry->table->name ) ) { - writer.beginTable( *dataEntry->table ); - currentTableName = dataEntry->table->name; - currentPkeys = dataEntry->table->primaryKeys; + tables[dataEntry->table->name] = std::make_unique( *dataEntry->table ); } if ( dataEntry->op == ChangesetDataEntry::OpInsert ) { ChangesetDataEntry out; out.op = ChangesetDataEntry::OpDelete; + out.table = tables[dataEntry->table->name].get(); out.oldValues = dataEntry->newValues; - writer.writeEntry( out ); + invertedEntries.push_back( out ); } else if ( dataEntry->op == ChangesetDataEntry::OpDelete ) { ChangesetDataEntry out; out.op = ChangesetDataEntry::OpInsert; + out.table = tables[dataEntry->table->name].get(); out.newValues = dataEntry->oldValues; - writer.writeEntry( out ); + invertedEntries.push_back( out ); } else if ( dataEntry->op == ChangesetDataEntry::OpUpdate ) { ChangesetDataEntry out; out.op = ChangesetDataEntry::OpUpdate; + out.table = tables[dataEntry->table->name].get(); out.newValues = dataEntry->oldValues; out.oldValues = dataEntry->newValues; // if a column is a part of pkey and has not been changed, // the original entry has "old" value the pkey value and "new" // value is undefined - let's reverse "old" and "new" in that case. - for ( size_t i = 0; i < currentPkeys.size(); ++i ) + for ( size_t i = 0; i < dataEntry->table->primaryKeys.size(); ++i ) { - if ( currentPkeys[i] && out.oldValues[i].type() == Value::TypeUndefined ) + if ( dataEntry->table->primaryKeys[i] && out.oldValues[i].type() == Value::TypeUndefined ) { out.oldValues[i] = out.newValues[i]; out.newValues[i].setUndefined(); } } - writer.writeEntry( out ); + invertedEntries.push_back( out ); } else { @@ -81,21 +87,57 @@ void invertChangeset( ChangesetReader &reader, ChangesetWriter &writer ) { ChangesetDropTableEntry out; out.tableName = ctEntry->tableName; - writer.writeEntry( out ); + out.columns = ctEntry->columns; + invertedEntries.push_back( out ); } else if ( ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) { ChangesetDropColumnEntry out; out.tableName = acEntry->tableName; - out.columnName = acEntry->column.name; - writer.writeEntry( out ); + out.column = acEntry->column; + invertedEntries.push_back( out ); + } + else if ( ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + { + ChangesetCreateTableEntry out; + out.tableName = dtEntry->tableName; + out.columns = dtEntry->columns; + invertedEntries.push_back( out ); + } + else if ( ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + { + ChangesetAddColumnEntry out; + out.tableName = dcEntry->tableName; + out.column = dcEntry->column; + invertedEntries.push_back( out ); } else { - // We can't invert DROP TABLE/COLUMN, because we don't know what's being dropped throw GeoDiffException( "Cannot invert changeset entry variant " + std::to_string( entry.index() ) ); } } + return {std::move( tables ), invertedEntries}; +} + +void invertChangeset( ChangesetReader &reader, ChangesetWriter &writer ) +{ + auto result = invertChangesetReverse( reader ); + std::vector &invertedReverse = std::get<1>( result ); + ChangesetTable *currentTable = nullptr; + for ( size_t i = 1; i <= invertedReverse.size(); i++ ) + { + const auto &entry = invertedReverse[invertedReverse.size() - i]; + if ( const ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) + { + if ( dataEntry->table != currentTable ) + { + writer.beginTable( *dataEntry->table ); + currentTable = dataEntry->table; + } + } + + writer.writeEntry( entry ); + } } nlohmann::json valueToJSON( const Value &value ) @@ -235,6 +277,11 @@ nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ) nlohmann::json res; res["type"] = "drop_table"; res["tableName"] = dtEntry->tableName; + res["columns"] = nlohmann::json::array(); + for ( const TableColumnInfo &column : dtEntry->columns ) + { + res["columns"].push_back( columnInfoToJSON( column ) ); + } return res; } else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) @@ -250,7 +297,7 @@ nlohmann::json changesetEntryToJSON( const ChangesetEntry &entry ) nlohmann::json res; res["type"] = "drop_column"; res["tableName"] = dcEntry->tableName; - res["columnName"] = dcEntry->columnName; + res["column"] = columnInfoToJSON( dcEntry->column ); return res; } else diff --git a/geodiff/src/changesetwriter.cpp b/geodiff/src/changesetwriter.cpp index def29944..392da299 100644 --- a/geodiff/src/changesetwriter.cpp +++ b/geodiff/src/changesetwriter.cpp @@ -163,6 +163,11 @@ void ChangesetWriter::writeDropTableEntry( const ChangesetDropTableEntry &entry { writeByte( static_cast( ChangesetEntryType::OpDropTable ) ); writeNullTerminatedString( entry.tableName ); + writeVarint( entry.columns.size() ); + for ( const TableColumnInfo &column : entry.columns ) + { + writeColumnInfo( column ); + } } void ChangesetWriter::writeAddColumnEntry( const ChangesetAddColumnEntry &entry ) @@ -176,5 +181,5 @@ void ChangesetWriter::writeDropColumnEntry( const ChangesetDropColumnEntry &entr { writeByte( static_cast( ChangesetEntryType::OpDropColumn ) ); writeNullTerminatedString( entry.tableName ); - writeNullTerminatedString( entry.columnName ); + writeColumnInfo( entry.column ); } diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 48165707..b9f54b33 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -5,6 +5,7 @@ #include "sqlitedriver.h" +#include "changeset.h" #include "changesetreader.h" #include "changesetwriter.h" #include "changesetutils.h" @@ -110,6 +111,8 @@ SqliteDriver::SqliteDriver( const Context *context ) { } +// Opens 'base' DB (with implicit schema called 'main') and optionally +// 'modified' DB (with explicit schema 'modified') void SqliteDriver::open( const DriverParametersMap &conn ) { DriverParametersMap::const_iterator connBaseIt = conn.find( "base" ); @@ -126,6 +129,8 @@ void SqliteDriver::open( const DriverParametersMap &conn ) } mDb = std::make_shared(); + mDb->open( base ); + if ( mHasModified ) { std::string modified = connModifiedIt->second; @@ -135,24 +140,12 @@ void SqliteDriver::open( const DriverParametersMap &conn ) throw GeoDiffException( "Missing 'modified' file when opening sqlite driver: " + modified ); } - mDb->open( ":memory:" ); - - { - Buffer sqlBuf; - sqlBuf.printf( "ATTACH '%q' AS base", base.c_str() ); - mDb->exec( sqlBuf ); - } - { Buffer sqlBuf; sqlBuf.printf( "ATTACH '%q' AS modified", modified.c_str() ); mDb->exec( sqlBuf ); } } - else - { - mDb->open( base ); - } // GeoPackage triggers require few functions like ST_IsEmpty() to be registered // in order to be able to apply changesets @@ -191,13 +184,13 @@ std::string SqliteDriver::databaseName( bool useModified ) { if ( mHasModified ) { - return useModified ? "modified" : "base"; + return useModified ? "modified" : "main"; } else { if ( useModified ) throw GeoDiffException( "'modified' table not open" ); - return "modified"; + return "main"; } } @@ -449,7 +442,7 @@ static std::string sqlColumnsStr( const TableDiffContext &diffContext, bool reve } } colsStr += sqlitePrintf( "\"%w\".\"%w\".\"%w\"", - reverse ? "base" : "modified", tableName, c.name.c_str() ); + reverse ? "main" : "modified", tableName, c.name.c_str() ); } return colsStr; } @@ -467,15 +460,15 @@ static std::string sqlFindInserted( const TableDiffContext &diffContext, bool re { if ( !exprPk.empty() ) exprPk += " AND "; - exprPk += sqlitePrintf( "\"modified\".\"%w\".\"%w\"=\"base\".\"%w\".\"%w\"", + exprPk += sqlitePrintf( "\"modified\".\"%w\".\"%w\"=\"main\".\"%w\".\"%w\"", modifiedTableName, c.name.c_str(), baseTableName, c.name.c_str() ); } } std::string sql = sqlitePrintf( "SELECT %s FROM \"%w\".\"%w\" WHERE NOT EXISTS ( SELECT 1 FROM \"%w\".\"%w\" WHERE %s)", sqlColumnsStr( diffContext, reverse ).c_str(), - reverse ? "base" : "modified", reverse ? baseTableName : modifiedTableName, - reverse ? "modified" : "base", reverse ? modifiedTableName : baseTableName, exprPk.c_str() ); + reverse ? "main" : "modified", reverse ? baseTableName : modifiedTableName, + reverse ? "modified" : "main", reverse ? modifiedTableName : baseTableName, exprPk.c_str() ); return sql; } @@ -493,7 +486,7 @@ static std::string sqlFindModified( const TableDiffContext &diffContext ) { if ( !exprPk.empty() ) exprPk += " AND "; - exprPk += sqlitePrintf( "\"modified\".\"%w\".\"%w\"=\"base\".\"%w\".\"%w\"", + exprPk += sqlitePrintf( "\"modified\".\"%w\".\"%w\"=\"main\".\"%w\".\"%w\"", modifiedTableName, c.name.c_str(), baseTableName, c.name.c_str() ); } else // not a primary key column @@ -501,7 +494,7 @@ static std::string sqlFindModified( const TableDiffContext &diffContext ) if ( !exprOther.empty() ) exprOther += " OR "; - exprOther += sqlitePrintf( "\"modified\".\"%w\".\"%w\" IS NOT \"base\".\"%w\".\"%w\"", + exprOther += sqlitePrintf( "\"modified\".\"%w\".\"%w\" IS NOT \"main\".\"%w\".\"%w\"", modifiedTableName, c.name.c_str(), baseTableName, c.name.c_str() ); } } @@ -520,12 +513,12 @@ static std::string sqlFindModified( const TableDiffContext &diffContext ) if ( exprOther.empty() ) { - return sqlitePrintf( "SELECT %s FROM \"modified\".\"%w\", \"base\".\"%w\" WHERE %s", + return sqlitePrintf( "SELECT %s FROM \"modified\".\"%w\", \"main\".\"%w\" WHERE %s", colsStr.c_str(), modifiedTableName, baseTableName, exprPk.c_str() ); } else { - return sqlitePrintf( "SELECT %s FROM \"modified\".\"%w\", \"base\".\"%w\" WHERE %s AND (%s)", + return sqlitePrintf( "SELECT %s FROM \"modified\".\"%w\", \"main\".\"%w\" WHERE %s AND (%s)", colsStr.c_str(), modifiedTableName, baseTableName, exprPk.c_str(), exprOther.c_str() ); } } @@ -563,7 +556,7 @@ static void handleInserted( const Context *context, TableDiffContext &diffContex { ChangesetTable chTable = schemaToChangesetTable( diffContext.schemaModified.name, diffContext.schemaModified ); diffContext.writer.beginTable( chTable ); - diffContext.tableEntryWritten = false; + diffContext.tableEntryWritten = true; } ChangesetDataEntry e; @@ -668,6 +661,109 @@ static void handleUpdated( const Context *context, TableDiffContext &diffContext } } +static const TableSchema &findTableSchema( const DatabaseSchema &schema, const std::string &tableName ) +{ + // Find the table schema + const TableSchema *table = nullptr; + for ( const TableSchema &tbl : schema.tables ) + { + if ( tbl.name == tableName ) + { + table = &tbl; + break; + } + } + if ( !table ) + throw GeoDiffException( "Missing schema for table " + tableName ); + return *table; +} + +// To allow diff inversion to work, we first delete all rows when dropping a +// table, and NULL out all rows when dropping a column. +static void writeDataChangesForSchemaChange( std::shared_ptr db, const DatabaseSchema &schemaBase, ChangesetWriter &writer, const ChangesetEntry &entry ) +{ + if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + { + const TableSchema &table = findTableSchema( schemaBase, dcEntry->tableName ); + + std::string pkeyColStr; + for ( const TableColumnInfo &c : table.columns ) + { + if ( c.isPrimaryKey ) + { + if ( !pkeyColStr.empty() ) + pkeyColStr += ", "; + pkeyColStr += sqlitePrintf( "\"%w\"", c.name.c_str() ); + } + } + if ( pkeyColStr.empty() ) + throw GeoDiffException( "Table " + table.name + " has no primary key" ); + + Sqlite3Stmt stmt; + stmt.prepare( db, "SELECT %s, \"%w\" FROM \"main\".\"%w\" WHERE \"%w\" IS NOT NULL", + pkeyColStr.c_str(), dcEntry->column.name.c_str(), dcEntry->tableName.c_str(), dcEntry->column.name.c_str() ); + + writer.beginTable( schemaToChangesetTable( table.name, table ) ); + int rc; + while ( SQLITE_ROW == ( rc = sqlite3_step( stmt.get() ) ) ) + { + ChangesetDataEntry e; + e.op = ChangesetDataEntry::OpUpdate; + + size_t idxInResult = 0; + for ( size_t i = 0; i < table.columns.size(); ++i ) + { + bool isPkey = table.columns[i].isPrimaryKey; + bool isDroppedCol = ( i == table.columns.size() - 1 ); + + if ( isPkey || isDroppedCol ) + { + Sqlite3Value v( sqlite3_column_value( stmt.get(), static_cast( idxInResult ) ) ); + e.oldValues.push_back( changesetValue( v.value() ) ); + idxInResult++; + } + else + e.oldValues.push_back( Value() ); + + if ( isDroppedCol ) + { + Value nullVal; + nullVal.setNull(); + e.newValues.push_back( nullVal ); + } + else + e.newValues.push_back( Value() ); + } + + writer.writeEntry( e ); + } + } + else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + { + const TableSchema &table = findTableSchema( schemaBase, dtEntry->tableName ); + + Sqlite3Stmt stmt; + stmt.prepare( db, "SELECT * FROM \"main\".\"%w\"", dtEntry->tableName.c_str() ); + + writer.beginTable( schemaToChangesetTable( table.name, table ) ); + int rc; + while ( SQLITE_ROW == ( rc = sqlite3_step( stmt.get() ) ) ) + { + ChangesetDataEntry e; + e.op = ChangesetDataEntry::OpDelete; + + size_t numColumns = table.columns.size(); + for ( size_t i = 0; i < numColumns; ++i ) + { + Sqlite3Value v( sqlite3_column_value( stmt.get(), static_cast( i ) ) ); + e.oldValues.push_back( changesetValue( v.value() ) ); + } + + writer.writeEntry( e ); + } + } +} + void SqliteDriver::createChangeset( ChangesetWriter &writer ) { DatabaseSchema schemaBase = getSchema( false ); @@ -676,6 +772,7 @@ void SqliteDriver::createChangeset( ChangesetWriter &writer ) auto schemaDiffEntries = diffDatabaseSchema( schemaBase, schemaModified ); for ( const ChangesetEntry &entry : schemaDiffEntries ) { + writeDataChangesForSchemaChange( mDb, schemaBase, writer, entry ); writer.writeEntry( entry ); } @@ -1178,13 +1275,12 @@ void SqliteDriver::createTables( const std::vector &tables ) { // currently we always create geopackage meta tables. Maybe in the future we can skip // that if there is a reason, and have that optional if none of the tables are spatial. + Sqlite3Stmt stmt1; - stmt1.prepare( mDb, "SELECT InitSpatialMetadata('modified');" ); + stmt1.prepare( mDb, "SELECT InitSpatialMetadata('main');" ); int res = sqlite3_step( stmt1.get() ); if ( res != SQLITE_ROW ) - { throwSqliteError( mDb->get(), "Failure initializing spatial metadata" ); - } for ( const TableSchema &tbl : tables ) { @@ -1222,7 +1318,7 @@ void SqliteDriver::createTables( const std::vector &tables ) } } - sql = sqlitePrintf( "CREATE TABLE \"%w\".\"%w\" (", "modified", tbl.name.c_str() ); + sql = sqlitePrintf( "CREATE TABLE main.\"%w\" (", tbl.name.c_str() ); if ( !columns.empty() ) { sql += columns; diff --git a/geodiff/src/drivers/sqliteutils.cpp b/geodiff/src/drivers/sqliteutils.cpp index ebfe9c0d..7dd7b0fa 100644 --- a/geodiff/src/drivers/sqliteutils.cpp +++ b/geodiff/src/drivers/sqliteutils.cpp @@ -434,7 +434,7 @@ void sqliteTables( const Context *context, std::vector sqliteColumnNames( const Context *context, std::shared_ptr db, - const std::string &zDb, /* Database ("main" or "aux") to query */ + const std::string &zDb, /* Database ("main" or "modified") to query */ const std::string &tableName /* Name of table to return details of */ ) { diff --git a/geodiff/src/tableschemadiff.cpp b/geodiff/src/tableschemadiff.cpp index 6d0d3025..e033653f 100644 --- a/geodiff/src/tableschemadiff.cpp +++ b/geodiff/src/tableschemadiff.cpp @@ -7,7 +7,6 @@ #include "changeset.h" #include "geodiffutils.hpp" #include "tableschema.h" -#include #include #include @@ -55,7 +54,7 @@ std::vector diffTableSchema( const TableSchema &base, const Tabl std::back_inserter( deletedColNames ) ); for ( const std::string &colName : deletedColNames ) { - entries.push_back( ChangesetDropColumnEntry{base.name, colName} ); + entries.push_back( ChangesetDropColumnEntry{base.name, *baseColumns.at( colName )} ); } std::vector newColNames; @@ -73,7 +72,9 @@ std::vector diffTableSchema( const TableSchema &base, const Tabl std::back_inserter( oldColNames ) ); for ( const std::string &colName : oldColNames ) { - if ( *baseColumns.at( colName ) != *modifiedColumns.at( colName ) ) + // Compare column type by base type enum rather than the exact db-specific + // string, to avoid regression with DB pairs that use compatible types. + if ( !baseColumns.at(colName)->compareWithBaseTypes( *modifiedColumns.at(colName) ) ) throw GeoDiffException( "Columns differ: " + base.name + "." + colName + " and " + modified.name + "." + colName + ")" ); } @@ -96,7 +97,7 @@ std::vector diffDatabaseSchema( const DatabaseSchema &base, cons std::back_inserter( deletedTableNames ) ); for ( const std::string &name : deletedTableNames ) { - entries.push_back( ChangesetDropTableEntry{name} ); + entries.push_back( ChangesetDropTableEntry{name, baseTables.at( name )->columns} ); } std::vector newTableNames; From f654bc4abf30c705981009559a069353313c7a1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Wed, 6 May 2026 22:21:05 +0200 Subject: [PATCH 05/21] Switch changeset entries to keep tables in shared_ptr Previously when reading entries, there was only one instance of ChangesetTable kept inside the reader, which would get overwritten on a new entry. This is fast when processing entries sequentially, but complicates/slows down the not-uncommon case of keeping entries around for longer. --- geodiff/src/changeset.h | 6 +- geodiff/src/changesetconcat.cpp | 9 +-- geodiff/src/changesetreader.cpp | 17 ++--- geodiff/src/changesetreader.h | 2 +- geodiff/src/changesetutils.cpp | 25 +++---- geodiff/src/drivers/postgresdriver.cpp | 5 -- geodiff/src/drivers/sqlitedriver.cpp | 8 --- geodiff/src/geodiff.cpp | 4 +- geodiff/src/tableschemadiff.cpp | 2 +- geodiff/tests/test_changeset_utils.cpp | 94 +++++++++++++------------- 10 files changed, 77 insertions(+), 95 deletions(-) diff --git a/geodiff/src/changeset.h b/geodiff/src/changeset.h index c381e42d..3367063d 100644 --- a/geodiff/src/changeset.h +++ b/geodiff/src/changeset.h @@ -248,15 +248,15 @@ struct ChangesetDataEntry * Optional pointer to the source table information as stored in changeset. * * When the changeset entry has been read by ChangesetReader, the table always will be set to a valid - * instance. Do not delete the instance - it is owned by ChangesetReader. + * instance. * * When the changeset entry is being passed to ChangesetWriter, the table pointer is ignored * and it does not need to be set (writer has an explicit beginTable() call to set table). */ - ChangesetTable *table = nullptr; + std::shared_ptr table; //! a quick way for tests to create a changeset entry - static ChangesetDataEntry make( ChangesetTable *t, OperationType o, const std::vector &oldV, const std::vector &newV ) + static ChangesetDataEntry make( std::shared_ptr t, OperationType o, const std::vector &oldV, const std::vector &newV ) { ChangesetDataEntry e; e.op = o; diff --git a/geodiff/src/changesetconcat.cpp b/geodiff/src/changesetconcat.cpp index 1b1156a6..a4ab9315 100644 --- a/geodiff/src/changesetconcat.cpp +++ b/geodiff/src/changesetconcat.cpp @@ -62,7 +62,7 @@ typedef std::unordered_set table; + std::shared_ptr table; TableEntriesSet entries; }; @@ -201,6 +201,7 @@ void concatChangesets( const std::string &outputChangeset ) { // hashtable: table name -> ( fid -> changeset entry ) + // TODO(dvdkon): What does this do with multiple different schemata in one diff (due to DDL entries)? std::unordered_map result; for ( const std::string &inputFilename : filenames ) @@ -220,9 +221,9 @@ void concatChangesets( if ( tableIt == result.end() ) { TableChanges &t = result[ entry.table->name ]; // adds new entry - t.table.reset( new ChangesetTable( *entry.table ) ); + t.table = entry.table; ChangesetDataEntry *e = new ChangesetDataEntry( entry ); - e->table = t.table.get(); + e->table = t.table; t.entries.insert( e ); } else @@ -233,7 +234,7 @@ void concatChangesets( { // row with this pkey is not in our list yet ChangesetDataEntry *e = new ChangesetDataEntry( entry ); - e->table = t.table.get(); + e->table = t.table; t.entries.insert( e ); } else diff --git a/geodiff/src/changesetreader.cpp b/geodiff/src/changesetreader.cpp index b74f97f2..30022973 100644 --- a/geodiff/src/changesetreader.cpp +++ b/geodiff/src/changesetreader.cpp @@ -92,7 +92,7 @@ bool ChangesetReader::isEmpty() const void ChangesetReader::rewind() { mOffset = 0; - mCurrentTable = ChangesetTable(); + mCurrentTable = {}; } char ChangesetReader::readByte() @@ -130,12 +130,12 @@ std::string ChangesetReader::readNullTerminatedString() void ChangesetReader::readRowValues( std::vector &values ) { // let's ensure we have the right size of array - if ( values.size() != mCurrentTable.columnCount() ) + if ( values.size() != mCurrentTable->columnCount() ) { - values.resize( mCurrentTable.columnCount() ); + values.resize( mCurrentTable->columnCount() ); } - for ( size_t i = 0; i < mCurrentTable.columnCount(); ++i ) + for ( size_t i = 0; i < mCurrentTable->columnCount(); ++i ) { int type = readByte(); if ( type == Value::TypeInt ) // 0x01 @@ -197,14 +197,15 @@ void ChangesetReader::readTableRecord() if ( nCol < 0 || nCol > 65536 ) throwReaderError( "readByte: unexpected number of columns" ); - mCurrentTable.primaryKeys.clear(); + mCurrentTable = std::make_shared(); + mCurrentTable->primaryKeys.clear(); for ( int i = 0; i < nCol; ++i ) { - mCurrentTable.primaryKeys.push_back( readByte() ); + mCurrentTable->primaryKeys.push_back( readByte() ); } - mCurrentTable.name = readNullTerminatedString(); + mCurrentTable->name = readNullTerminatedString(); } ChangesetDataEntry ChangesetReader::readDataEntry( ChangesetEntryType type ) @@ -221,7 +222,7 @@ ChangesetDataEntry ChangesetReader::readDataEntry( ChangesetEntryType type ) entry.newValues.erase( entry.newValues.begin(), entry.newValues.end() ); entry.op = static_cast( type ); - entry.table = &mCurrentTable; + entry.table = mCurrentTable; return entry; } diff --git a/geodiff/src/changesetreader.h b/geodiff/src/changesetreader.h index e25805d3..6ae6de21 100644 --- a/geodiff/src/changesetreader.h +++ b/geodiff/src/changesetreader.h @@ -59,7 +59,7 @@ class ChangesetReader std::unique_ptr mBuffer; - ChangesetTable mCurrentTable; // currently processed table + std::shared_ptr mCurrentTable; // currently processed table }; diff --git a/geodiff/src/changesetutils.cpp b/geodiff/src/changesetutils.cpp index f309cffc..2347dca1 100644 --- a/geodiff/src/changesetutils.cpp +++ b/geodiff/src/changesetutils.cpp @@ -25,28 +25,20 @@ ChangesetTable schemaToChangesetTable( const std::string &tableName, const Table } // Returns inverted changeset entries in reverse order -std::tuple>, std::vector> invertChangesetReverse( ChangesetReader &reader ) +std::vector invertChangesetReverse( ChangesetReader &reader ) { std::string currentTableName; - std::unordered_map> tables; std::vector invertedEntries; ChangesetEntry entry; while ( reader.nextEntry( entry ) ) { if ( ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) { - if ( !dataEntry->table ) - throw GeoDiffException( "ChangesetDataEntry without table data read!" ); - if ( !tables.count( dataEntry->table->name ) ) - { - tables[dataEntry->table->name] = std::make_unique( *dataEntry->table ); - } - if ( dataEntry->op == ChangesetDataEntry::OpInsert ) { ChangesetDataEntry out; out.op = ChangesetDataEntry::OpDelete; - out.table = tables[dataEntry->table->name].get(); + out.table = dataEntry->table; out.oldValues = dataEntry->newValues; invertedEntries.push_back( out ); } @@ -54,7 +46,7 @@ std::tuple>, std { ChangesetDataEntry out; out.op = ChangesetDataEntry::OpInsert; - out.table = tables[dataEntry->table->name].get(); + out.table = dataEntry->table; out.newValues = dataEntry->oldValues; invertedEntries.push_back( out ); } @@ -62,7 +54,7 @@ std::tuple>, std { ChangesetDataEntry out; out.op = ChangesetDataEntry::OpUpdate; - out.table = tables[dataEntry->table->name].get(); + out.table = dataEntry->table; out.newValues = dataEntry->oldValues; out.oldValues = dataEntry->newValues; // if a column is a part of pkey and has not been changed, @@ -116,23 +108,22 @@ std::tuple>, std throw GeoDiffException( "Cannot invert changeset entry variant " + std::to_string( entry.index() ) ); } } - return {std::move( tables ), invertedEntries}; + return invertedEntries; } void invertChangeset( ChangesetReader &reader, ChangesetWriter &writer ) { - auto result = invertChangesetReverse( reader ); - std::vector &invertedReverse = std::get<1>( result ); + std::vector invertedReverse = invertChangesetReverse( reader ); ChangesetTable *currentTable = nullptr; for ( size_t i = 1; i <= invertedReverse.size(); i++ ) { const auto &entry = invertedReverse[invertedReverse.size() - i]; if ( const ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) { - if ( dataEntry->table != currentTable ) + if ( dataEntry->table.get() != currentTable ) { writer.beginTable( *dataEntry->table ); - currentTable = dataEntry->table; + currentTable = dataEntry->table.get(); } } diff --git a/geodiff/src/drivers/postgresdriver.cpp b/geodiff/src/drivers/postgresdriver.cpp index d0077b80..4d6f032d 100644 --- a/geodiff/src/drivers/postgresdriver.cpp +++ b/geodiff/src/drivers/postgresdriver.cpp @@ -904,7 +904,6 @@ void PostgresDriver::applyChangeset( ChangesetReader &reader ) std::vector conflictingEntries; ChangesetEntry entry; PostgresChangeApplyState state; - std::unordered_map> tableCopies; while ( reader.nextEntry( entry ) ) { if ( ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) @@ -916,10 +915,6 @@ void PostgresDriver::applyChangeset( ChangesetReader &reader ) case ChangeApplyResult::Skipped: break; case ChangeApplyResult::ConstraintConflict: - if ( tableCopies.count( dataEntry->table->name ) == 0 ) - // cppcheck-suppress stlFindInsert - tableCopies[dataEntry->table->name] = std::unique_ptr( new ChangesetTable( *dataEntry->table ) ); - dataEntry->table = tableCopies[dataEntry->table->name].get(); conflictingEntries.push_back( *dataEntry ); break; case ChangeApplyResult::NoChange: diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index b9f54b33..46ebf4ea 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -1109,7 +1109,6 @@ void SqliteDriver::applyChangeset( ChangesetReader &reader ) std::vector conflictingEntries; ChangesetEntry entry; SqliteChangeApplyState state; - std::unordered_map> tableCopies; while ( reader.nextEntry( entry ) ) { if ( ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) @@ -1122,13 +1121,6 @@ void SqliteDriver::applyChangeset( ChangesetReader &reader ) break; // Applied correctly, continue onward. case ChangeApplyResult::ConstraintConflict: // Ordering conflict found, handle later. - // Effectively copying the entry isn't simple, since ChangesetReader is - // happy to change entry.table under our feet. We need to copy the - // table object, ideally only keeping one per table. - if ( tableCopies.count( dataEntry->table->name ) == 0 ) - // cppcheck-suppress stlFindInsert - tableCopies[dataEntry->table->name] = std::unique_ptr( new ChangesetTable( *dataEntry->table ) ); - dataEntry->table = tableCopies[dataEntry->table->name].get(); conflictingEntries.push_back( *dataEntry ); break; case ChangeApplyResult::NoChange: diff --git a/geodiff/src/geodiff.cpp b/geodiff/src/geodiff.cpp index ac5def05..2e52e280 100644 --- a/geodiff/src/geodiff.cpp +++ b/geodiff/src/geodiff.cpp @@ -1274,8 +1274,8 @@ int GEODIFF_CE_operation( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetE GEODIFF_ChangesetTableH GEODIFF_CE_table( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetEntryH entryHandle ) { - ChangesetTable *table = static_cast( entryHandle )->table; - return table; + std::shared_ptr table = static_cast( entryHandle )->table; + return table.get(); } int GEODIFF_CE_countValues( GEODIFF_ContextH /*contextHandle*/, GEODIFF_ChangesetEntryH entryHandle ) diff --git a/geodiff/src/tableschemadiff.cpp b/geodiff/src/tableschemadiff.cpp index e033653f..5082c146 100644 --- a/geodiff/src/tableschemadiff.cpp +++ b/geodiff/src/tableschemadiff.cpp @@ -73,7 +73,7 @@ std::vector diffTableSchema( const TableSchema &base, const Tabl for ( const std::string &colName : oldColNames ) { // Compare column type by base type enum rather than the exact db-specific - // string, to avoid regression with DB pairs that use compatible types. + // string to avoid regression with DB pairs that use compatible types. if ( !baseColumns.at(colName)->compareWithBaseTypes( *modifiedColumns.at(colName) ) ) throw GeoDiffException( "Columns differ: " + base.name + "." + colName + " and " + modified.name + "." + colName + ")" ); diff --git a/geodiff/tests/test_changeset_utils.cpp b/geodiff/tests/test_changeset_utils.cpp index c6c7e6df..589396b9 100644 --- a/geodiff/tests/test_changeset_utils.cpp +++ b/geodiff/tests/test_changeset_utils.cpp @@ -4,6 +4,8 @@ */ #include "gtest/gtest.h" +#include +#include "changeset.h" #include "geodiff_testutils.hpp" #include "geodiff.h" @@ -181,62 +183,62 @@ void testConcat( std::string testName, void testConcatOneTable( std::string testName, - const ChangesetTable &table, + const std::shared_ptr table, std::vector entries1, std::vector entries2, std::vector entriesExpected ) { testConcat( testName, - { std::make_pair( table.name, table ) }, - { std::make_pair( table.name, entries1 ) }, - { std::make_pair( table.name, entries2 ) }, - { std::make_pair( table.name, entriesExpected ) } ); + { std::make_pair( table->name, *table ) }, + { std::make_pair( table->name, entries1 ) }, + { std::make_pair( table->name, entries2 ) }, + { std::make_pair( table->name, entriesExpected ) } ); } TEST( ChangesetUtils, test_concat_changesets_simple_table ) { // basic table with one pkey column - ChangesetTable tableFoo; - tableFoo.name = "foo"; - tableFoo.primaryKeys.push_back( true ); // fid (pkey) - tableFoo.primaryKeys.push_back( false ); // name - tableFoo.primaryKeys.push_back( false ); // rating + std::shared_ptr tableFoo = std::make_shared(); + tableFoo->name = "foo"; + tableFoo->primaryKeys.push_back( true ); // fid (pkey) + tableFoo->primaryKeys.push_back( false ); // name + tableFoo->primaryKeys.push_back( false ); // rating ChangesetDataEntry fooInsert123 = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpInsert, {}, + tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } ); ChangesetDataEntry fooDelete123 = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpDelete, + tableFoo, ChangesetDataEntry::OpDelete, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) }, {} ); ChangesetDataEntry fooUpdate123 = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpUpdate, + tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) }, { Value(), Value::makeText( "world" ), Value::makeInt( 4 ) } ); ChangesetDataEntry fooDelete123_2 = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpDelete, + tableFoo, ChangesetDataEntry::OpDelete, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) }, {} ); ChangesetDataEntry fooUpdate123_2 = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpUpdate, + tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value(), Value::makeInt( 4 ) }, { Value(), Value(), Value::makeInt( 1 ) } ); ChangesetDataEntry fooUpdate123_inverse = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpUpdate, + tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) }, { Value(), Value::makeText( "hello" ), Value::makeInt( 5 ) } ); ChangesetDataEntry fooUpdate123_pkey = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpUpdate, + tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value(), Value() }, { Value::makeInt( 124 ), Value(), Value() } ); ChangesetDataEntry fooUpdate456 = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpUpdate, + tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 456 ), Value(), Value::makeInt( 1 ) }, { Value(), Value(), Value::makeInt( 2 ) } ); @@ -244,7 +246,7 @@ TEST( ChangesetUtils, test_concat_changesets_simple_table ) testConcatOneTable( "foo-insert-update", tableFoo, { fooInsert123 }, { fooUpdate123 }, { - ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpInsert, {}, + ChangesetDataEntry::make( tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) } ) } ); @@ -253,7 +255,7 @@ TEST( ChangesetUtils, test_concat_changesets_simple_table ) testConcatOneTable( "foo-update-update", tableFoo, { fooUpdate123 }, { fooUpdate123_2 }, { - ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpUpdate, + ChangesetDataEntry::make( tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) }, { Value(), Value::makeText( "world" ), Value::makeInt( 1 ) } ) @@ -263,7 +265,7 @@ TEST( ChangesetUtils, test_concat_changesets_simple_table ) testConcatOneTable( "foo-update-delete", tableFoo, { fooUpdate123 }, { fooDelete123_2 }, { - ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpDelete, + ChangesetDataEntry::make( tableFoo, ChangesetDataEntry::OpDelete, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) }, {} ) @@ -271,7 +273,7 @@ TEST( ChangesetUtils, test_concat_changesets_simple_table ) testConcatOneTable( "foo-delete-insert", tableFoo, { fooDelete123_2 }, { fooInsert123 }, { - ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpUpdate, + ChangesetDataEntry::make( tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) }, { Value(), Value::makeText( "hello" ), Value::makeInt( 5 ) } ) @@ -295,17 +297,17 @@ TEST( ChangesetUtils, test_concat_changesets_simple_table ) TEST( ChangesetUtils, test_concat_changesets_no_pkey_table ) { // a table with no pkey - ChangesetTable tableNoPkey; - tableNoPkey.name = "table_no_pkey"; - tableNoPkey.primaryKeys.push_back( false ); - tableNoPkey.primaryKeys.push_back( false ); + std::shared_ptr tableNoPkey = std::make_shared(); + tableNoPkey->name = "table_no_pkey"; + tableNoPkey->primaryKeys.push_back( false ); + tableNoPkey->primaryKeys.push_back( false ); ChangesetDataEntry noPkeyInsert1 = ChangesetDataEntry::make( - &tableNoPkey, ChangesetDataEntry::OpInsert, {}, + tableNoPkey, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 1 ), Value::makeText( "hey" ) } ); ChangesetDataEntry noPkeyUpdate2 = ChangesetDataEntry::make( - &tableNoPkey, ChangesetDataEntry::OpUpdate, + tableNoPkey, ChangesetDataEntry::OpUpdate, { Value::makeInt( 2 ), Value::makeText( "huh" ) }, { Value(), Value::makeText( "ho!" ) } ); @@ -316,32 +318,32 @@ TEST( ChangesetUtils, test_concat_changesets_no_pkey_table ) TEST( ChangesetUtils, test_concat_changesets_multiple_tables ) { - ChangesetTable tableFoo; - tableFoo.name = "foo"; - tableFoo.primaryKeys.push_back( true ); // fid (pkey) - tableFoo.primaryKeys.push_back( false ); // name - tableFoo.primaryKeys.push_back( false ); // rating + std::shared_ptr tableFoo = std::make_shared(); + tableFoo->name = "foo"; + tableFoo->primaryKeys.push_back( true ); // fid (pkey) + tableFoo->primaryKeys.push_back( false ); // name + tableFoo->primaryKeys.push_back( false ); // rating - ChangesetTable tableBar; - tableBar.name = "bar"; - tableBar.primaryKeys.push_back( true ); // fid (pkey) - tableBar.primaryKeys.push_back( false ); // name + std::shared_ptr tableBar = std::make_shared(); + tableBar->name = "bar"; + tableBar->primaryKeys.push_back( true ); // fid (pkey) + tableBar->primaryKeys.push_back( false ); // name ChangesetDataEntry fooInsert123 = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpInsert, {}, + tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } ); ChangesetDataEntry barInsert123 = ChangesetDataEntry::make( - &tableBar, ChangesetDataEntry::OpInsert, {}, + tableBar, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "ha!" ) } ); ChangesetDataEntry barUpdate123 = ChangesetDataEntry::make( - &tableFoo, ChangesetDataEntry::OpUpdate, + tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "ha!" ) }, { Value(), Value::makeText( ":-)" ) } ); testConcat( "multi-related-insert-update", - { std::make_pair( "foo", tableFoo ), std::make_pair( "bar", tableBar ) }, + { std::make_pair( "foo", *tableFoo ), std::make_pair( "bar", *tableBar ) }, // changeset 1 { std::make_pair( "foo", std::vector( { fooInsert123 } ) ), @@ -352,17 +354,17 @@ TEST( ChangesetUtils, test_concat_changesets_multiple_tables ) // expected result { std::make_pair( "foo", std::vector( { - ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpInsert, {}, + ChangesetDataEntry::make( tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } ) } ) ), std::make_pair( "bar", std::vector( { - ChangesetDataEntry::make( &tableBar, ChangesetDataEntry::OpInsert, {}, + ChangesetDataEntry::make( tableBar, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( ":-)" ) } ) } ) ) } ); testConcat( "multi-unrelated-insert-update", - { std::make_pair( "foo", tableFoo ), std::make_pair( "bar", tableBar ) }, + { std::make_pair( "foo", *tableFoo ), std::make_pair( "bar", *tableBar ) }, // changeset 1 { std::make_pair( "foo", std::vector( { fooInsert123 } ) ) }, // changeset 2 @@ -370,11 +372,11 @@ TEST( ChangesetUtils, test_concat_changesets_multiple_tables ) // expected result { std::make_pair( "foo", std::vector( { - ChangesetDataEntry::make( &tableFoo, ChangesetDataEntry::OpInsert, {}, + ChangesetDataEntry::make( tableFoo, ChangesetDataEntry::OpInsert, {}, { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } ) } ) ), std::make_pair( "bar", std::vector( { - ChangesetDataEntry::make( &tableBar, ChangesetDataEntry::OpUpdate, + ChangesetDataEntry::make( tableBar, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "ha!" ) }, { Value(), Value::makeText( ":-)" ) } ) } ) ) From cb8cb5d9fd8db2a7812710550c86111e32e2a1ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Sun, 10 May 2026 19:18:39 +0200 Subject: [PATCH 06/21] Add code for applying schema changes in SQLite --- geodiff/src/drivers/sqlitedriver.cpp | 460 +++++++++++++++++---------- geodiff/src/drivers/sqlitedriver.h | 5 +- 2 files changed, 298 insertions(+), 167 deletions(-) diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 46ebf4ea..71782b39 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -9,6 +9,7 @@ #include "changesetreader.h" #include "changesetwriter.h" #include "changesetutils.h" +#include "driver.h" #include "geodiffcontext.hpp" #include "geodifflogger.hpp" #include "geodiffutils.hpp" @@ -21,12 +22,12 @@ #include -void SqliteDriver::logApplyConflict( const std::string &type, const ChangesetDataEntry &entry, bool isDbErr ) const +void SqliteDriver::logApplyConflict( const std::string &type, const ChangesetEntry &entry, bool isDbErr ) const { std::string msg = "CONFLICT: " + type; if ( isDbErr ) msg += " (" + std::string( sqlite3_errmsg( mDb->get() ) ) + ")"; - msg += ":\n" + changesetDataEntryToJSON( entry ).dump( 2 ); + msg += ":\n" + changesetEntryToJSON( entry ).dump( 2 ); context()->logger().warn( msg ); } @@ -967,7 +968,7 @@ ChangeApplyResult SqliteDriver::applyDataChange( SqliteChangeApplyState &state, if ( context()->isTableSkipped( tableName ) ) // skip table if necessary return ChangeApplyResult::Skipped; - if ( state.tableState.count( tableName ) == 0 ) + if ( state.tableState.count( entry.table.get() ) == 0 ) { TableSchema schema = tableSchema( tableName ); @@ -983,14 +984,14 @@ ChangeApplyResult SqliteDriver::applyDataChange( SqliteChangeApplyState &state, throw GeoDiffException( "Mismatch of primary keys in table: " + tableName ); } - SqliteChangeApplyState::TableState &tbl = state.tableState[tableName]; + SqliteChangeApplyState::TableState &tbl = state.tableState[entry.table.get()]; tbl.schema = schema; tbl.stmtInsert.prepare( mDb, sqlForInsert( tableName, schema ) ); tbl.stmtUpdate.prepare( mDb, sqlForUpdate( tableName, schema ) ); tbl.stmtDelete.prepare( mDb, sqlForDelete( tableName, schema ) ); } - SqliteChangeApplyState::TableState &tbl = state.tableState[tableName]; + SqliteChangeApplyState::TableState &tbl = state.tableState[entry.table.get()]; if ( entry.op == SQLITE_INSERT ) { @@ -1068,6 +1069,242 @@ ChangeApplyResult SqliteDriver::applyDataChange( SqliteChangeApplyState &state, return ChangeApplyResult::Applied; } +static void addGpkgCrsDefinition( std::shared_ptr db, const CrsDefinition &crs ) +{ + // gpkg_spatial_ref_sys + // srs_name TEXT NOT NULL, srs_id INTEGER NOT NULL PRIMARY KEY, + // organization TEXT NOT NULL, organization_coordsys_id INTEGER NOT NULL, + // definition TEXT NOT NULL, description TEXT + + Sqlite3Stmt stmtCheck; + stmtCheck.prepare( db, "select count(*) from gpkg_spatial_ref_sys where srs_id = %d;", crs.srsId ); + int res = sqlite3_step( stmtCheck.get() ); + if ( res != SQLITE_ROW ) + { + throwSqliteError( db->get(), "Failed to access gpkg_spatial_ref_sys table" ); + } + + if ( sqlite3_column_int( stmtCheck.get(), 0 ) ) + return; // already there + + if ( crs.wkt.size() == 0 ) + throw GeoDiffException( "Tried to add new CRS without WKT definition" ); + + Sqlite3Stmt stmt; + stmt.prepare( db, "INSERT INTO gpkg_spatial_ref_sys VALUES ('%q:%d', %d, '%q', %d, '%q', '')", + crs.authName.c_str(), crs.authCode, crs.srsId, crs.authName.c_str(), crs.authCode, + crs.wkt.c_str() ); + res = sqlite3_step( stmt.get() ); + if ( res != SQLITE_DONE ) + { + throwSqliteError( db->get(), "Failed to insert CRS to gpkg_spatial_ref_sys table" ); + } +} + +static void addGpkgSpatialTable( std::shared_ptr db, const TableSchema &tbl, const Extent &extent ) +{ + size_t i = tbl.geometryColumn(); + if ( i == SIZE_MAX ) + throw GeoDiffException( "Adding non-spatial tables is not supported: " + tbl.name ); + + const TableColumnInfo &col = tbl.columns[i]; + std::string geomColumn = col.name; + std::string geomType = col.geomType; + int srsId = col.geomSrsId; + bool hasZ = col.geomHasZ; + bool hasM = col.geomHasM; + + // gpkg_contents + // table_name TEXT NOT NULL PRIMARY KEY, data_type TEXT NOT NULL, + // identifier TEXT, description TEXT DEFAULT '', + // last_change DATETIME NOT NULL DEFAULT (...), + // min_x DOUBLE, min_y DOUBLE, max_x DOUBLE, max_y DOUBLE, + // srs_id INTEGER + + Sqlite3Stmt stmt; + stmt.prepare( db, "INSERT INTO gpkg_contents (table_name, data_type, identifier, min_x, min_y, max_x, max_y, srs_id) " + "VALUES ('%q', 'features', '%q', %f, %f, %f, %f, %d)", + tbl.name.c_str(), tbl.name.c_str(), extent.minX, extent.minY, extent.maxX, extent.maxY, srsId ); + int res = sqlite3_step( stmt.get() ); + if ( res != SQLITE_DONE ) + { + throwSqliteError( db->get(), "Failed to insert row to gpkg_contents table" ); + } + + // gpkg_geometry_columns + // table_name TEXT NOT NULL, column_name TEXT NOT NULL, + // geometry_type_name TEXT NOT NULL, srs_id INTEGER NOT NULL, + // z TINYINT NOT NULL,m TINYINT NOT NULL + + Sqlite3Stmt stmtGeomCol; + stmtGeomCol.prepare( db, "INSERT INTO gpkg_geometry_columns VALUES ('%q', '%q', '%q', %d, %d, %d)", + tbl.name.c_str(), geomColumn.c_str(), geomType.c_str(), srsId, hasZ, hasM ); + res = sqlite3_step( stmtGeomCol.get() ); + if ( res != SQLITE_DONE ) + { + throwSqliteError( db->get(), "Failed to insert row to gpkg_geometry_columns table" ); + } +} + +static void createTable( std::shared_ptr db, const TableSchema &tbl ) +{ + if ( tbl.geometryColumn() != SIZE_MAX ) + { + addGpkgCrsDefinition( db, tbl.crs ); + addGpkgSpatialTable( db, tbl, Extent() ); // TODO: is it OK to set zeros? + } + + std::string sql, pkeyCols, columns; + for ( const TableColumnInfo &c : tbl.columns ) + { + if ( !columns.empty() ) + columns += ", "; + + columns += sqlitePrintf( "\"%w\" %s", c.name.c_str(), c.type.dbType.c_str() ); + + if ( c.isNotNull ) + columns += " NOT NULL"; + + // we have also c.isAutoIncrement, but the SQLite AUTOINCREMENT keyword only applies + // to primary keys, and according to the docs, ordinary tables with INTEGER PRIMARY KEY column + // (which becomes alias to ROWID) does auto-increment, and AUTOINCREMENT just prevents + // reuse of ROWIDs from previously deleted rows. + // See https://sqlite.org/autoinc.html + + if ( c.isPrimaryKey ) + { + if ( !pkeyCols.empty() ) + pkeyCols += ", "; + pkeyCols += sqlitePrintf( "\"%w\"", c.name.c_str() ); + } + } + + sql = sqlitePrintf( "CREATE TABLE \"%w\" (", tbl.name.c_str() ); + if ( !columns.empty() ) + { + sql += columns; + } + if ( !pkeyCols.empty() ) + { + sql += ", PRIMARY KEY (" + pkeyCols + ")"; + } + sql += ");"; + + Sqlite3Stmt stmt; + stmt.prepare( db, sql ); + if ( sqlite3_step( stmt.get() ) != SQLITE_DONE ) + { + throwSqliteError( db->get(), "Failure creating table: " + tbl.name ); + } +} + +void SqliteDriver::applySchemaChange( const ChangesetEntry &entry ) +{ + if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) + { + // TODO: Also save full CRS definition inside diff? It's pretty large and + // we'd need it for all tables with geometry columns & geometry columns + // themselves. + CrsDefinition tableCrs; + for ( const TableColumnInfo &col : ctEntry->columns ) + { + if ( col.isGeometry ) + tableCrs.srsId = col.geomSrsId; + } + + Sqlite3SavepointTransaction transaction( context(), mDb ); + try + { + createTable( mDb, { ctEntry->tableName, ctEntry->columns, tableCrs } ); + } + catch ( const GeoDiffException &ex ) + { + // TODO: Make sure this only catches sqlite errors on CREATE TABLE + logApplyConflict( "create_table_failed", entry, true ); + throw; + } + transaction.commitChanges(); + } + else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + { + // Check there's no data in table (zero rows) + { + Sqlite3Stmt stmt; + stmt.prepare( mDb, "SELECT COUNT(*) FROM \"%w\"", dtEntry->tableName.c_str() ); + if ( sqlite3_step( stmt.get() ) != SQLITE_ROW ) + throwSqliteError( mDb->get(), "Getting row count in " + dtEntry->tableName ); + if ( sqlite3_column_int( stmt.get(), 0 ) != 0 ) + { + logApplyConflict( "drop_table_not_empty", entry ); + throw GeoDiffException( "Tried to drop non-empty table " + dtEntry->tableName ); + } + } + + Sqlite3Stmt stmt; + stmt.prepare( mDb, "DROP TABLE \"%w\"", dtEntry->tableName.c_str() ); + if ( sqlite3_step( stmt.get() ) != SQLITE_DONE ) + { + logApplyConflict( "drop_table_failed", entry, true ); + throwSqliteError( mDb->get(), "Failure deleting table: " + dtEntry->tableName ); + } + } + else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + { + if ( acEntry->column.isGeometry ) + // Would need changing gpkg metadata + throw GeoDiffException( "Adding geometry columns is not supported" ); + if ( acEntry->column.isPrimaryKey ) + throw GeoDiffException( "Adding column to primary key is not supported" ); + + std::string sql = sqlitePrintf( "ALTER TABLE \"%w\" ADD COLUMN \"%w\" %s", + acEntry->tableName.c_str(), acEntry->column.name.c_str(), acEntry->column.type.dbType.c_str() ); + + if ( acEntry->column.isNotNull ) + sql += " NOT NULL"; + Sqlite3Stmt stmt; + stmt.prepare( mDb, "%s", sql.c_str() ); + if ( sqlite3_step( stmt.get() ) != SQLITE_DONE ) + { + logApplyConflict( "drop_column_failed", entry, true ); + throwSqliteError( mDb->get(), "Failure adding column: " + acEntry->tableName + "." + acEntry->column.name ); + } + } + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + { + if ( dcEntry->column.isGeometry ) + throw GeoDiffException( "Dropping geometry columns is not supported" ); + if ( dcEntry->column.isPrimaryKey ) + throw GeoDiffException( "Dropping column from primary key is not supported" ); + + // Check there's no data in the column (all NULLs) + { + Sqlite3Stmt stmt; + stmt.prepare( mDb, "SELECT COUNT(*) FROM \"%w\" WHERE \"%w\" IS NOT NULL", + dcEntry->tableName.c_str(), dcEntry->column.name.c_str() ); + if ( sqlite3_step( stmt.get() ) != SQLITE_ROW ) + throwSqliteError( mDb->get(), "Getting row count in " + dcEntry->tableName + "." + dcEntry->column.name ); + if ( sqlite3_column_int( stmt.get(), 0 ) != 0 ) + { + logApplyConflict( "drop_column_not_empty", entry ); + throw GeoDiffException( "Tried to drop non-empty column " + dcEntry->tableName + "." + dcEntry->column.name ); + } + } + + Sqlite3Stmt stmt; + stmt.prepare( mDb, "ALTER TABLE \"%w\" DROP COLUMN \"%w\"", + dcEntry->tableName.c_str(), dcEntry->column.name.c_str() ); + if ( sqlite3_step( stmt.get() ) != SQLITE_DONE ) + { + logApplyConflict( "drop_column_failed", entry, true ); + throwSqliteError( mDb->get(), "Failure deleting column: " + dcEntry->tableName + "." + dcEntry->column.name ); + } + } + else + { + throw GeoDiffException( "Unhandled entry type (should have been schema change) " + + std::to_string( entry.index() ) ); + } +} void SqliteDriver::applyChangeset( ChangesetReader &reader ) { @@ -1105,66 +1342,82 @@ void SqliteDriver::applyChangeset( ChangesetReader &reader ) statement.close(); } + // Applying some entries may fail due to constraints, since they require the + // entries to be in some specific, unknown order. To work around this, we + // retry applying the conflicting entries until either we apply them all or we + // get stuck. + // + // We can only reorder data entries, not schema-changing DDL entries, so we + // gather conflicting data entries in a list until either we run out of + // entries or read a schema-change entry. + int unrecoverableConflictCount = 0; std::vector conflictingEntries; ChangesetEntry entry; SqliteChangeApplyState state; - while ( reader.nextEntry( entry ) ) + while ( true ) { - if ( ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) + bool haveEntry = reader.nextEntry( entry ); + if ( !haveEntry || !std::holds_alternative( entry ) ) { - ChangeApplyResult res = applyDataChange( state, *dataEntry ); - switch ( res ) + // We can't reorder entries beyond this point (see above), retry applying + // conflicting ones. + std::vector newConflictingEntries; + while ( conflictingEntries.size() > 0 ) { - case ChangeApplyResult::Applied: - case ChangeApplyResult::Skipped: - break; // Applied correctly, continue onward. - case ChangeApplyResult::ConstraintConflict: - // Ordering conflict found, handle later. - conflictingEntries.push_back( *dataEntry ); - break; - case ChangeApplyResult::NoChange: - unrecoverableConflictCount++; // Other issue, will throw at the end. - break; + for ( const ChangesetDataEntry ¢ry : conflictingEntries ) + { + ChangeApplyResult res = applyDataChange( state, centry ); + switch ( res ) + { + case ChangeApplyResult::Applied: + case ChangeApplyResult::Skipped: + break; // Applied correctly, don't put it in the new list. + case ChangeApplyResult::ConstraintConflict: + newConflictingEntries.push_back( centry ); // Still conflicting, keep in list. + break; + case ChangeApplyResult::NoChange: + unrecoverableConflictCount++; // Other issue, will throw at the end. + break; + } + } + + // If we haven't been able to apply any of the conflicting entries this + // loop, then these conflicts can't be resolved by reordering entries. + if ( newConflictingEntries.size() == conflictingEntries.size() ) + { + for ( const ChangesetDataEntry ¢ry : conflictingEntries ) + logApplyConflict( "unresolvable_conflict", centry ); + throw GeoDiffConflictsException( "Could not resolve dependencies in constraint conflicts." ); + } + conflictingEntries = newConflictingEntries; + newConflictingEntries.clear(); } } - // TODO(dvdkon): Handle DDL entries - } + if ( !haveEntry ) + break; - // Applying some entries may fail due to constraints, since they require the - // entries to be in some specific, unknown order. To work around this, we - // retry applying the conflicting entries until either we apply them all or we - // get stuck. - std::vector newConflictingEntries; - while ( conflictingEntries.size() > 0 ) - { - for ( const ChangesetDataEntry ¢ry : conflictingEntries ) + if ( const ChangesetDataEntry *dataEntry = std::get_if( &entry ) ) { - ChangeApplyResult res = applyDataChange( state, centry ); + ChangeApplyResult res = applyDataChange( state, *dataEntry ); switch ( res ) { case ChangeApplyResult::Applied: case ChangeApplyResult::Skipped: - break; // Applied correctly, don't put it in the new list. + break; // Applied correctly, continue onward. case ChangeApplyResult::ConstraintConflict: - newConflictingEntries.push_back( centry ); // Still conflicting, keep in list. + // Ordering conflict found, handle later. + conflictingEntries.push_back( *dataEntry ); break; case ChangeApplyResult::NoChange: unrecoverableConflictCount++; // Other issue, will throw at the end. break; } } - - // If we haven't been able to apply any of the conflicting entries this - // loop, then these conflicts can't be resolved by reordering entries. - if ( newConflictingEntries.size() == conflictingEntries.size() ) + else { - for ( const ChangesetDataEntry ¢ry : conflictingEntries ) - logApplyConflict( "unresolvable_conflict", centry ); - throw GeoDiffConflictsException( "Could not resolve dependencies in constraint conflicts." ); + applySchemaChange( entry ); } - conflictingEntries = newConflictingEntries; - newConflictingEntries.clear(); } // recreate triggers @@ -1188,81 +1441,6 @@ void SqliteDriver::applyChangeset( ChangesetReader &reader ) } } - -static void addGpkgCrsDefinition( std::shared_ptr db, const CrsDefinition &crs ) -{ - // gpkg_spatial_ref_sys - // srs_name TEXT NOT NULL, srs_id INTEGER NOT NULL PRIMARY KEY, - // organization TEXT NOT NULL, organization_coordsys_id INTEGER NOT NULL, - // definition TEXT NOT NULL, description TEXT - - Sqlite3Stmt stmtCheck; - stmtCheck.prepare( db, "select count(*) from gpkg_spatial_ref_sys where srs_id = %d;", crs.srsId ); - int res = sqlite3_step( stmtCheck.get() ); - if ( res != SQLITE_ROW ) - { - throwSqliteError( db->get(), "Failed to access gpkg_spatial_ref_sys table" ); - } - - if ( sqlite3_column_int( stmtCheck.get(), 0 ) ) - return; // already there - - Sqlite3Stmt stmt; - stmt.prepare( db, "INSERT INTO gpkg_spatial_ref_sys VALUES ('%q:%d', %d, '%q', %d, '%q', '')", - crs.authName.c_str(), crs.authCode, crs.srsId, crs.authName.c_str(), crs.authCode, - crs.wkt.c_str() ); - res = sqlite3_step( stmt.get() ); - if ( res != SQLITE_DONE ) - { - throwSqliteError( db->get(), "Failed to insert CRS to gpkg_spatial_ref_sys table" ); - } -} - -static void addGpkgSpatialTable( std::shared_ptr db, const TableSchema &tbl, const Extent &extent ) -{ - size_t i = tbl.geometryColumn(); - if ( i == SIZE_MAX ) - throw GeoDiffException( "Adding non-spatial tables is not supported: " + tbl.name ); - - const TableColumnInfo &col = tbl.columns[i]; - std::string geomColumn = col.name; - std::string geomType = col.geomType; - int srsId = col.geomSrsId; - bool hasZ = col.geomHasZ; - bool hasM = col.geomHasM; - - // gpkg_contents - // table_name TEXT NOT NULL PRIMARY KEY, data_type TEXT NOT NULL, - // identifier TEXT, description TEXT DEFAULT '', - // last_change DATETIME NOT NULL DEFAULT (...), - // min_x DOUBLE, min_y DOUBLE, max_x DOUBLE, max_y DOUBLE, - // srs_id INTEGER - - Sqlite3Stmt stmt; - stmt.prepare( db, "INSERT INTO gpkg_contents (table_name, data_type, identifier, min_x, min_y, max_x, max_y, srs_id) " - "VALUES ('%q', 'features', '%q', %f, %f, %f, %f, %d)", - tbl.name.c_str(), tbl.name.c_str(), extent.minX, extent.minY, extent.maxX, extent.maxY, srsId ); - int res = sqlite3_step( stmt.get() ); - if ( res != SQLITE_DONE ) - { - throwSqliteError( db->get(), "Failed to insert row to gpkg_contents table" ); - } - - // gpkg_geometry_columns - // table_name TEXT NOT NULL, column_name TEXT NOT NULL, - // geometry_type_name TEXT NOT NULL, srs_id INTEGER NOT NULL, - // z TINYINT NOT NULL,m TINYINT NOT NULL - - Sqlite3Stmt stmtGeomCol; - stmtGeomCol.prepare( db, "INSERT INTO gpkg_geometry_columns VALUES ('%q', '%q', '%q', %d, %d, %d)", - tbl.name.c_str(), geomColumn.c_str(), geomType.c_str(), srsId, hasZ, hasM ); - res = sqlite3_step( stmtGeomCol.get() ); - if ( res != SQLITE_DONE ) - { - throwSqliteError( db->get(), "Failed to insert row to gpkg_geometry_columns table" ); - } -} - void SqliteDriver::createTables( const std::vector &tables ) { // currently we always create geopackage meta tables. Maybe in the future we can skip @@ -1278,55 +1456,7 @@ void SqliteDriver::createTables( const std::vector &tables ) { if ( startsWith( tbl.name, "gpkg_" ) ) continue; - - if ( tbl.geometryColumn() != SIZE_MAX ) - { - addGpkgCrsDefinition( mDb, tbl.crs ); - addGpkgSpatialTable( mDb, tbl, Extent() ); // TODO: is it OK to set zeros? - } - - std::string sql, pkeyCols, columns; - for ( const TableColumnInfo &c : tbl.columns ) - { - if ( !columns.empty() ) - columns += ", "; - - columns += sqlitePrintf( "\"%w\" %s", c.name.c_str(), c.type.dbType.c_str() ); - - if ( c.isNotNull ) - columns += " NOT NULL"; - - // we have also c.isAutoIncrement, but the SQLite AUTOINCREMENT keyword only applies - // to primary keys, and according to the docs, ordinary tables with INTEGER PRIMARY KEY column - // (which becomes alias to ROWID) does auto-increment, and AUTOINCREMENT just prevents - // reuse of ROWIDs from previously deleted rows. - // See https://sqlite.org/autoinc.html - - if ( c.isPrimaryKey ) - { - if ( !pkeyCols.empty() ) - pkeyCols += ", "; - pkeyCols += sqlitePrintf( "\"%w\"", c.name.c_str() ); - } - } - - sql = sqlitePrintf( "CREATE TABLE main.\"%w\" (", tbl.name.c_str() ); - if ( !columns.empty() ) - { - sql += columns; - } - if ( !pkeyCols.empty() ) - { - sql += ", PRIMARY KEY (" + pkeyCols + ")"; - } - sql += ");"; - - Sqlite3Stmt stmt; - stmt.prepare( mDb, sql ); - if ( sqlite3_step( stmt.get() ) != SQLITE_DONE ) - { - throwSqliteError( mDb->get(), "Failure creating table: " + tbl.name ); - } + createTable( mDb, tbl ); } } diff --git a/geodiff/src/drivers/sqlitedriver.h b/geodiff/src/drivers/sqlitedriver.h index cca70f2d..228c2c59 100644 --- a/geodiff/src/drivers/sqlitedriver.h +++ b/geodiff/src/drivers/sqlitedriver.h @@ -27,7 +27,7 @@ class SqliteChangeApplyState Sqlite3Stmt stmtDelete; }; - std::unordered_map tableState; + std::unordered_map tableState; }; @@ -59,8 +59,9 @@ class SqliteDriver : public Driver void dumpData( ChangesetWriter &writer, bool useModified = false ) override; private: - void logApplyConflict( const std::string &type, const ChangesetDataEntry &entry, bool isDbErr = false ) const; + void logApplyConflict( const std::string &type, const ChangesetEntry &entry, bool isDbErr = false ) const; ChangeApplyResult applyDataChange( SqliteChangeApplyState &state, const ChangesetDataEntry &entry ); + void applySchemaChange( const ChangesetEntry &entry ); std::string databaseName( bool useModified = false ); std::shared_ptr mDb; From 1052db3938588f45c545e4b318f113f099d32aac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Thu, 14 May 2026 23:40:11 +0200 Subject: [PATCH 07/21] Update rebase to work with schema changes Assisted-by: Claude Sonnet 4.6 --- geodiff/src/drivers/sqlitedriver.cpp | 44 +-- geodiff/src/geodiff.cpp | 20 +- geodiff/src/geodiffrebase.cpp | 409 ++++++++++++++++++--------- geodiff/src/geodiffrebase.hpp | 2 + geodiff/src/tableschema.cpp | 7 + geodiff/src/tableschema.h | 3 + geodiff/src/tableschemadiff.cpp | 58 +++- geodiff/src/tableschemadiff.hpp | 2 + 8 files changed, 393 insertions(+), 152 deletions(-) diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 71782b39..02a112b9 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -19,6 +19,7 @@ #include #include +#include #include @@ -662,30 +663,16 @@ static void handleUpdated( const Context *context, TableDiffContext &diffContext } } -static const TableSchema &findTableSchema( const DatabaseSchema &schema, const std::string &tableName ) -{ - // Find the table schema - const TableSchema *table = nullptr; - for ( const TableSchema &tbl : schema.tables ) - { - if ( tbl.name == tableName ) - { - table = &tbl; - break; - } - } - if ( !table ) - throw GeoDiffException( "Missing schema for table " + tableName ); - return *table; -} - // To allow diff inversion to work, we first delete all rows when dropping a // table, and NULL out all rows when dropping a column. -static void writeDataChangesForSchemaChange( std::shared_ptr db, const DatabaseSchema &schemaBase, ChangesetWriter &writer, const ChangesetEntry &entry ) +static void writeDataChangesForSchemaChange( std::shared_ptr db, const std::unordered_map ¤tSchemata, ChangesetWriter &writer, const ChangesetEntry &entry ) { if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) { - const TableSchema &table = findTableSchema( schemaBase, dcEntry->tableName ); + auto it = currentSchemata.find( dcEntry->tableName ); + if ( it == currentSchemata.end() ) + throw GeoDiffException( "Missing schema for table " + dcEntry->tableName ); + const TableSchema &table = it->second; std::string pkeyColStr; for ( const TableColumnInfo &c : table.columns ) @@ -741,7 +728,10 @@ static void writeDataChangesForSchemaChange( std::shared_ptr db, cons } else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) { - const TableSchema &table = findTableSchema( schemaBase, dtEntry->tableName ); + auto it = currentSchemata.find( dtEntry->tableName ); + if ( it == currentSchemata.end() ) + throw GeoDiffException( "Missing schema for table " + dtEntry->tableName ); + const TableSchema &table = it->second; Sqlite3Stmt stmt; stmt.prepare( db, "SELECT * FROM \"main\".\"%w\"", dtEntry->tableName.c_str() ); @@ -770,11 +760,23 @@ void SqliteDriver::createChangeset( ChangesetWriter &writer ) DatabaseSchema schemaBase = getSchema( false ); DatabaseSchema schemaModified = getSchema( true ); + // We keep table schemata that have exactly the written out schema-change + // entries applied. They're necessary to know the intermediate database state + // for any data changes (e.g. row deletions before table drop). + std::unordered_map currentSchemata; + for ( const TableSchema &tbl : schemaBase.tables ) + currentSchemata[tbl.name] = tbl; + auto schemaDiffEntries = diffDatabaseSchema( schemaBase, schemaModified ); for ( const ChangesetEntry &entry : schemaDiffEntries ) { - writeDataChangesForSchemaChange( mDb, schemaBase, writer, entry ); + writeDataChangesForSchemaChange( mDb, currentSchemata, writer, entry ); writer.writeEntry( entry ); + + if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + simulateColumnChange( currentSchemata[acEntry->tableName], entry ); + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + simulateColumnChange( currentSchemata[dcEntry->tableName], entry ); } for ( const TableSchema &tblBase : schemaBase.tables ) diff --git a/geodiff/src/geodiff.cpp b/geodiff/src/geodiff.cpp index 2e52e280..df798d0a 100644 --- a/geodiff/src/geodiff.cpp +++ b/geodiff/src/geodiff.cpp @@ -496,7 +496,7 @@ int GEODIFF_createRebasedChangeset( static void createRebasedChangesetEx( Context *context, const char *driverName, - const char * /* driverExtraInfo */, + const char *driverExtraInfo, const char *base, const char *base2modified, const char *base2their, @@ -508,11 +508,23 @@ static void createRebasedChangesetEx( throw GeoDiffException( "NULL arguments to GEODIFF_createRebasedChangesetEx" ); } - // TODO: use driverName + driverExtraInfo + base when creating rebased - // changeset (e.g. to check whether a newly created ID is actually free) + // Open the base DB to get its schema + DatabaseSchema baseSchema; + { + DriverParametersMap conn; + conn["base"] = std::string( base ); + if ( driverExtraInfo && *driverExtraInfo ) + conn["conninfo"] = std::string( driverExtraInfo ); + std::unique_ptr driver( Driver::createDriver( context, std::string( driverName ) ) ); + if ( !driver ) + throw GeoDiffException( "Unable to use driver: " + std::string( driverName ) ); + driver->open( conn ); + for ( const std::string &tableName : driver->listTables() ) + baseSchema.tables.push_back( driver->tableSchema( tableName ) ); + } std::vector conflicts; - rebase( context, base2their, rebased, base2modified, conflicts ); + rebase( context, baseSchema, base2their, rebased, base2modified, conflicts ); // output conflicts if ( conflicts.empty() ) diff --git a/geodiff/src/geodiffrebase.cpp b/geodiff/src/geodiffrebase.cpp index 564f0adc..3f778276 100644 --- a/geodiff/src/geodiffrebase.cpp +++ b/geodiff/src/geodiffrebase.cpp @@ -12,6 +12,9 @@ #include "changesetreader.h" #include "changesetwriter.h" +#include "changesetutils.h" +#include "tableschema.h" +#include "tableschemadiff.hpp" #include #include @@ -52,7 +55,7 @@ struct TableRebaseInfo { std::set inserted; //!< pkeys that were inserted std::set deleted; //!< pkeys that were deleted - std::map > updated; //!< new column values for each recorded row (identified by pkey) + std::map> updated; //!< new column values (by name) for each updated row (identified by pkey) void dump( std::ostringstream &ret ) { @@ -75,6 +78,7 @@ struct TableRebaseInfo struct DatabaseRebaseInfo { std::map tables; //!< mapping for each table (key = table name) + DatabaseSchema theirSchema; void dump( const Context *context ) { @@ -94,7 +98,8 @@ struct DatabaseRebaseInfo }; -//! structure that keeps track of how we modify primary keys of the rebased changeset +//! structure that keeps track of how we modify primary keys and column indices +// of the rebased changeset. struct RebaseMapping { @@ -204,39 +209,80 @@ int _get_primary_key( const ChangesetDataEntry &entry ) int _parse_old_changeset( const Context *context, + const DatabaseSchema &baseSchema, ChangesetReader &reader_BASE_THEIRS, DatabaseRebaseInfo &dbInfo ) { + dbInfo.theirSchema = baseSchema; + ChangesetEntry entry; while ( reader_BASE_THEIRS.nextEntry( entry ) ) { - if ( !std::holds_alternative( entry ) ) - continue; - ChangesetDataEntry &dataEntry = std::get( entry ); + if ( std::holds_alternative( entry ) ) + { + ChangesetDataEntry &dataEntry = std::get( entry ); - std::string tableName = dataEntry.table->name; + std::string tableName = dataEntry.table->name; - // skip table if necessary - if ( context->isTableSkipped( tableName ) ) - { - continue; - } + // skip table if necessary + if ( context->isTableSkipped( tableName ) ) + { + continue; + } - int pk = _get_primary_key( dataEntry ); + int pk = _get_primary_key( dataEntry ); - TableRebaseInfo &tableInfo = dbInfo.tables[tableName]; + TableRebaseInfo &tableInfo = dbInfo.tables[tableName]; - if ( dataEntry.op == ChangesetDataEntry::OpInsert ) + if ( dataEntry.op == ChangesetDataEntry::OpInsert ) + { + tableInfo.inserted.insert( pk ); + } + if ( dataEntry.op == ChangesetDataEntry::OpDelete ) + { + tableInfo.deleted.insert( pk ); + } + if ( dataEntry.op == ChangesetDataEntry::OpUpdate ) + { + const TableSchema *ts = dbInfo.theirSchema.tableByName( tableName ); + if ( !ts ) + throw GeoDiffException( "Update entry for table not in schema: " + tableName ); + std::map namedVals; + for ( size_t i = 0; i < dataEntry.newValues.size() && i < ts->columns.size(); i++ ) + { + if ( dataEntry.newValues[i].type() != Value::TypeUndefined ) + namedVals[ts->columns[i].name] = dataEntry.newValues[i]; + } + tableInfo.updated[pk] = std::move( namedVals ); + } + } + else if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) + { + if ( context->isTableSkipped( ctEntry->tableName ) ) + continue; + simulateSchemaChange( dbInfo.theirSchema, entry ); + } + else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) { - tableInfo.inserted.insert( pk ); + if ( context->isTableSkipped( dtEntry->tableName ) ) + continue; + simulateSchemaChange( dbInfo.theirSchema, entry ); } - if ( dataEntry.op == ChangesetDataEntry::OpDelete ) + else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) { - tableInfo.deleted.insert( pk ); + if ( context->isTableSkipped( acEntry->tableName ) ) + continue; + simulateSchemaChange( dbInfo.theirSchema, entry ); } - if ( dataEntry.op == ChangesetDataEntry::OpUpdate ) + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) { - tableInfo.updated[pk] = dataEntry.newValues; + if ( context->isTableSkipped( dcEntry->tableName ) ) + continue; + simulateSchemaChange( dbInfo.theirSchema, entry ); + } + else + { + throw GeoDiffException( "Unhandled entry type in rebase: " + std::to_string( entry.index() ) ); } } @@ -365,12 +411,12 @@ int _find_mapping_for_new_changeset( } -bool _handle_insert( const ChangesetDataEntry &entry, const RebaseMapping &mapping, ChangesetDataEntry &outEntry ) +bool _handle_insert( const ChangesetDataEntry &entry, const RebaseMapping &mapping, + const std::map &colMap, + ChangesetDataEntry &outEntry ) { - size_t numColumns = entry.table->columnCount(); - outEntry.op = ChangesetDataEntry::OpInsert; - outEntry.newValues.resize( numColumns ); + outEntry.newValues.resize( outEntry.table->columnCount() ); // resolve primary key and patched primary key int pk = _get_primary_key( entry ); @@ -378,31 +424,28 @@ bool _handle_insert( const ChangesetDataEntry &entry, const RebaseMapping &mappi if ( mapping.hasOldPkey( entry.table->name, pk ) ) { - // conflict 2 concurrent updates... + // conflict 2 concurrent inserts... newPk = mapping.getNewPkey( entry.table->name, pk ); } - for ( size_t i = 0; i < numColumns; i++ ) + for ( const auto &[inIdx, outIdx] : colMap ) { - if ( entry.table->primaryKeys[i] ) - { - outEntry.newValues[i].setInt( newPk ); - } + if ( outEntry.table->primaryKeys[outIdx] ) + outEntry.newValues[outIdx].setInt( newPk ); else - { - outEntry.newValues[i] = entry.newValues[i]; - } + outEntry.newValues[outIdx] = entry.newValues[inIdx]; } return true; } bool _handle_delete( const ChangesetDataEntry &entry, const RebaseMapping &mapping, - const TableRebaseInfo &tableInfo, ChangesetDataEntry &outEntry ) + const TableRebaseInfo &tableInfo, + const std::map &colMap, + const TableSchema &inTableSchema, + ChangesetDataEntry &outEntry ) { - size_t numColumns = entry.table->columnCount(); - outEntry.op = ChangesetDataEntry::OpDelete; - outEntry.oldValues.resize( numColumns ); + outEntry.oldValues.resize( outEntry.table->columnCount() ); // resolve primary key and patched primary key int pk = _get_primary_key( entry ); @@ -410,43 +453,39 @@ bool _handle_delete( const ChangesetDataEntry &entry, const RebaseMapping &mappi if ( mapping.hasOldPkey( entry.table->name, pk ) ) { - // conflict 2 concurrent updates... - newPk = mapping.getNewPkey( entry.table->name, pk ); - // conflict 2 concurrent deletes... + newPk = mapping.getNewPkey( entry.table->name, pk ); if ( newPk == RebaseMapping::INVALID_FID ) return false; } // find the previously new values (will be used as the old values in the rebased version) - std::vector patchedVals; + const std::map *patchedMap = nullptr; auto a = tableInfo.updated.find( pk ); - if ( a == tableInfo.updated.end() ) - patchedVals.resize( static_cast( numColumns ) ); - else - patchedVals = a->second; + if ( a != tableInfo.updated.end() ) + patchedMap = &a->second; - for ( size_t i = 0; i < numColumns; i++ ) + for ( const auto &[inIdx, outIdx] : colMap ) { - if ( entry.table->primaryKeys[i] ) + if ( outEntry.table->primaryKeys[outIdx] ) { - outEntry.oldValues[i].setInt( newPk ); + outEntry.oldValues[outIdx].setInt( newPk ); } else { // if the value was patched in the previous commit, use that one as base - Value value; - const Value &patchedVal = patchedVals[i]; - if ( patchedVal.type() != Value::TypeUndefined ) + Value patchedVal; + if ( patchedMap ) { - value = patchedVal; + auto it = patchedMap->find( inTableSchema.columns[inIdx].name ); + if ( it != patchedMap->end() ) + patchedVal = it->second; } + if ( patchedVal.type() != Value::TypeUndefined ) + outEntry.oldValues[outIdx] = patchedVal; else - { // otherwise the value is same for both patched and this, so use base value - value = entry.oldValues[i]; - } - outEntry.oldValues[i] = value; + outEntry.oldValues[outIdx] = entry.oldValues[inIdx]; } } return true; @@ -466,14 +505,15 @@ void _addConflictItem( ConflictFeature &conflictFeature, int i, } bool _handle_update( const ChangesetDataEntry &entry, const RebaseMapping &mapping, - const TableRebaseInfo &tableInfo, ChangesetDataEntry &outEntry, + const TableRebaseInfo &tableInfo, + const std::map &colMap, + const TableSchema &inTableSchema, + ChangesetDataEntry &outEntry, std::vector &conflicts ) { - size_t numColumns = entry.table->columnCount(); - outEntry.op = ChangesetDataEntry::OpUpdate; - outEntry.oldValues.resize( numColumns ); - outEntry.newValues.resize( numColumns ); + outEntry.oldValues.resize( outEntry.table->columnCount() ); + outEntry.newValues.resize( outEntry.table->columnCount() ); // get values from patched (new) master int pk = _get_primary_key( entry ); @@ -485,132 +525,248 @@ bool _handle_update( const ChangesetDataEntry &entry, const RebaseMapping &mappi } // find the previously new values (will be used as the old values in the rebased version) - std::vector patchedVals; + const std::map *patchedMap = nullptr; auto a = tableInfo.updated.find( pk ); - if ( a == tableInfo.updated.end() ) - patchedVals.resize( static_cast( numColumns ) ); - else - patchedVals = a->second; + if ( a != tableInfo.updated.end() ) + patchedMap = &a->second; ConflictFeature conflictFeature( pk, entry.table->name ); bool entryHasChanges = false; - for ( size_t i = 0; i < numColumns; i++ ) + for ( const auto &[inIdx, outIdx] : colMap ) { - Value patchedVal = patchedVals[i]; - if ( patchedVal.type() != Value::TypeUndefined && entry.newValues[i].type() != Value::TypeUndefined ) + Value patchedVal; + if ( patchedMap ) { - if ( patchedVal == entry.newValues[i] ) + auto it = patchedMap->find( inTableSchema.columns[inIdx].name ); + if ( it != patchedMap->end() ) + patchedVal = it->second; + } + + if ( patchedVal.type() != Value::TypeUndefined && entry.newValues[inIdx].type() != Value::TypeUndefined ) + { + if ( patchedVal == entry.newValues[inIdx] ) { // both "old" and "new" changeset modify the column's value to the same value - that // means that in our rebased changeset there's no further change and there's no conflict - outEntry.oldValues[i].setUndefined(); - outEntry.newValues[i].setUndefined(); + outEntry.oldValues[outIdx].setUndefined(); + outEntry.newValues[outIdx].setUndefined(); } else { // we have edit conflict here: both "old" changeset and the "new" changeset modify the same // column of the same row. Rebased changeset will get the "old" value updated to the new (patched) // value of the older changeset - outEntry.oldValues[i] = patchedVal; - outEntry.newValues[i] = entry.newValues[i]; + outEntry.oldValues[outIdx] = patchedVal; + outEntry.newValues[outIdx] = entry.newValues[inIdx]; entryHasChanges = true; - _addConflictItem( conflictFeature, ( int ) i, entry.oldValues[i], patchedVal, entry.newValues[i] ); + _addConflictItem( conflictFeature, outIdx, entry.oldValues[inIdx], patchedVal, entry.newValues[inIdx] ); } } else { // the "new" changeset stays as is without modifications - outEntry.oldValues[i] = entry.oldValues[i]; - outEntry.newValues[i] = entry.newValues[i]; + outEntry.oldValues[outIdx] = entry.oldValues[inIdx]; + outEntry.newValues[outIdx] = entry.newValues[inIdx]; // if a column is pkey, it would have "new" value undefined in the entry and that's not an actual change - if ( entry.newValues[i].type() != Value::TypeUndefined ) + if ( entry.newValues[inIdx].type() != Value::TypeUndefined ) entryHasChanges = true; } } if ( conflictFeature.isValid() ) - { conflicts.push_back( conflictFeature ); - } return entryHasChanges; } //! throws GeoDiffException on error void _prepare_new_changeset( const Context *context, ChangesetReader &reader, const std::string &changesetNew, - const RebaseMapping &mapping, const DatabaseRebaseInfo &dbInfo, + RebaseMapping &mapping, const DatabaseRebaseInfo &dbInfo, + const DatabaseSchema &baseSchema, std::vector &conflicts ) { - ChangesetEntry entry; - std::map tableDefinitions; + // The base DB schema with our changes from already processed entries applied + // on top. + DatabaseSchema currentSchema = baseSchema; + // The base DB schema with our their changes and then ourchanges from already + // processed entries applied on top. + DatabaseSchema outputSchema = dbInfo.theirSchema; + // table schema -> (old column index -> new column index) + // Column being absent means its index didn't change. + std::map> columnIndexMap; + std::map > tableChanges; + // Cached output ChangesetTable for the current table. + std::shared_ptr outChangesetTable; + + ChangesetEntry entry; while ( reader.nextEntry( entry ) ) { - if ( !std::holds_alternative( entry ) ) - continue; - ChangesetDataEntry &dataEntry = std::get( entry ); + if ( std::holds_alternative( entry ) ) + { + ChangesetDataEntry &dataEntry = std::get( entry ); + std::string tableName = dataEntry.table->name; - std::string tableName = dataEntry.table->name; + // skip table if necessary + if ( context->isTableSkipped( tableName ) ) + continue; - // skip table if necessary - if ( context->isTableSkipped( tableName ) ) - { - continue; - } + TableSchema *tableSchema = currentSchema.tableByName( tableName ); - // Inserts table into the definitions, if it doesn't already contain it - tableDefinitions.insert( {tableName, *dataEntry.table} ); + if ( !tableSchema ) + throw GeoDiffException( "Tried rebasing data entry for table not in schema: " + tableName ); - auto tablesIt = dbInfo.tables.find( tableName ); - if ( tablesIt == dbInfo.tables.end() ) - { - // we have change in different table that was modified in theirs modifications - // just copy plain the change to the output buffer - tableChanges[tableName].push_back( entry ); - continue; - } + // Get the output table schema (theirs + our schema changes so far). + TableSchema *outTableSchema = outputSchema.tableByName( tableName ); + if ( !outTableSchema ) + // Table was dropped by theirs. + continue; - bool writeEntry = false; - ChangesetDataEntry outEntry; + // Compute column mapping (input index -> output index) on first encounter. + if ( columnIndexMap.find( dataEntry.table.get() ) == columnIndexMap.end() ) + { + std::map colMap; + for ( size_t i = 0; i < tableSchema->columns.size(); i++ ) + { + const std::string &colName = tableSchema->columns[i].name; + for ( size_t j = 0; j < outTableSchema->columns.size(); j++ ) + { + if ( outTableSchema->columns[j].name == colName ) + { + colMap[static_cast( i )] = static_cast( j ); + break; + } + } + } + columnIndexMap[dataEntry.table.get()] = std::move( colMap ); + } - // commits to same table -> now save the change to changeset - switch ( dataEntry.op ) - { - case ChangesetDataEntry::OpUpdate: - writeEntry = _handle_update( dataEntry, mapping, tablesIt->second, outEntry, conflicts ); - break; + const std::map &colMap = columnIndexMap[dataEntry.table.get()]; - case ChangesetDataEntry::OpInsert: - writeEntry = _handle_insert( dataEntry, mapping, outEntry ); - break; + // Rebuild cached output ChangesetTable when the table name changes. + if ( !outChangesetTable || outChangesetTable->name != tableName ) + outChangesetTable = std::make_shared( schemaToChangesetTable( tableName, *outTableSchema ) ); + + auto tablesIt = dbInfo.tables.find( tableName ); + if ( tablesIt == dbInfo.tables.end() ) + { + // Table not touched by theirs data-wise - copy through as-is. + tableChanges[tableName].push_back( entry ); + continue; + } - case ChangesetDataEntry::OpDelete: - writeEntry = _handle_delete( dataEntry, mapping, tablesIt->second, outEntry ); - break; + bool writeEntry = false; + ChangesetDataEntry outEntry; + outEntry.table = outChangesetTable; + + // commits to same table -> now save the change to changeset + switch ( dataEntry.op ) + { + case ChangesetDataEntry::OpUpdate: + writeEntry = _handle_update( dataEntry, mapping, tablesIt->second, colMap, *tableSchema, outEntry, conflicts ); + break; + + case ChangesetDataEntry::OpInsert: + writeEntry = _handle_insert( dataEntry, mapping, colMap, outEntry ); + break; + + case ChangesetDataEntry::OpDelete: + writeEntry = _handle_delete( dataEntry, mapping, tablesIt->second, colMap, *tableSchema, outEntry ); + break; + } + + if ( writeEntry ) + tableChanges[tableName].push_back( outEntry ); } + else + { + simulateSchemaChange( currentSchema, entry ); + outChangesetTable = nullptr; // Invalidate cached schema, columns may change + + // Check whether the same change is already contained in theirs. If not, + // add it to the output. + bool isDuplicate = false; + std::string schemaEntryTableName; + if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) + { + schemaEntryTableName = ctEntry->tableName; + TableSchema *existing = outputSchema.tableByName( ctEntry->tableName ); + if ( existing ) + { + if ( existing->columns != ctEntry->columns ) + throw GeoDiffException( "Conflict: table " + ctEntry->tableName + + " was created by both changesets with different columns" ); + isDuplicate = true; + } + } + else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + { + schemaEntryTableName = dtEntry->tableName; + isDuplicate = outputSchema.tableByName( dtEntry->tableName ) == nullptr; + } + else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + { + schemaEntryTableName = acEntry->tableName; + TableSchema *table = outputSchema.tableByName( acEntry->tableName ); + if ( table ) + { + auto it = std::find_if( table->columns.begin(), table->columns.end(), + [&]( const TableColumnInfo & c ) { return c.name == acEntry->column.name; } ); + if ( it != table->columns.end() ) + { + if ( *it != acEntry->column ) + throw GeoDiffException( "During rebase, column " + acEntry->tableName + "." + acEntry->column.name + + " was added by both changesets with different definitions" ); + isDuplicate = true; + } + } + else + throw GeoDiffException( " During rebase tried to add column " + acEntry->tableName + "." + acEntry->column.name + " to non-existent table" ); + } + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + { + schemaEntryTableName = dcEntry->tableName; + TableSchema *table = outputSchema.tableByName( dcEntry->tableName ); + if ( table ) + { + auto it = std::find_if( table->columns.begin(), table->columns.end(), + [&]( const TableColumnInfo & c ) { return c.name == dcEntry->column.name; } ); + isDuplicate = it == table->columns.end(); + } + else + throw GeoDiffException( " During rebase tried to drop column " + dcEntry->tableName + "." + dcEntry->column.name + " from non-existent table" ); + } - if ( writeEntry ) - tableChanges[tableName].push_back( outEntry ); + if ( !isDuplicate ) + { + simulateSchemaChange( outputSchema, entry ); + tableChanges[schemaEntryTableName].push_back( entry ); + } + } } ChangesetWriter writer; writer.open( changesetNew ); - for ( auto it : tableDefinitions ) + for ( auto &it : tableChanges ) { - auto chit = tableChanges.find( it.first ); - if ( chit == tableChanges.end() ) - continue; - - const std::vector &changes = chit->second; + const std::vector &changes = it.second; if ( changes.empty() ) continue; - writer.beginTable( it.second ); + ChangesetTable *defWritten = nullptr; for ( const ChangesetEntry &writeEntry : changes ) { + if ( auto dataEntry = std::get_if( &writeEntry ) ) + { + if ( defWritten != dataEntry->table.get() ) + { + writer.beginTable( *dataEntry->table ); + defWritten = dataEntry->table.get(); + } + } writer.writeEntry( writeEntry ); } } @@ -618,6 +774,7 @@ void _prepare_new_changeset( const Context *context, void rebase( const Context *context, + const DatabaseSchema &baseSchema, const std::string &changeset_BASE_THEIRS, const std::string &changeset_THEIRS_MODIFIED, const std::string &changeset_BASE_MODIFIED, @@ -652,7 +809,7 @@ void rebase( // 1. go through the original changeset and extract data that will be needed in the second step DatabaseRebaseInfo dbInfo; - int rc = _parse_old_changeset( context, reader_BASE_THEIRS, dbInfo ); + int rc = _parse_old_changeset( context, baseSchema, reader_BASE_THEIRS, dbInfo ); if ( rc != GEODIFF_SUCCESS ) throw GeoDiffException( "Could not parse changeset_BASE_THEIRS: " + changeset_BASE_THEIRS ); @@ -665,5 +822,5 @@ void rebase( reader_BASE_MODIFIED.rewind(); // 3. go through the changeset to be rebased again and write it with changes determined in step 2 - _prepare_new_changeset( context, reader_BASE_MODIFIED, changeset_THEIRS_MODIFIED, mapping, dbInfo, conflicts ); + _prepare_new_changeset( context, reader_BASE_MODIFIED, changeset_THEIRS_MODIFIED, mapping, dbInfo, baseSchema, conflicts ); } diff --git a/geodiff/src/geodiffrebase.hpp b/geodiff/src/geodiffrebase.hpp index 2c960c41..25a539ca 100644 --- a/geodiff/src/geodiffrebase.hpp +++ b/geodiff/src/geodiffrebase.hpp @@ -9,11 +9,13 @@ #include #include #include "geodiffutils.hpp" +#include "tableschema.h" class Logger; //! throws GeoDiffException on error void rebase( const Context *context, + const DatabaseSchema &baseSchema, //in const std::string &changeset_BASE_THEIRS, //in const std::string &changeset_THEIRS_MODIFIED, // out const std::string &changeset_BASE_MODIFIED, //in diff --git a/geodiff/src/tableschema.cpp b/geodiff/src/tableschema.cpp index 7b4ed218..4998f782 100644 --- a/geodiff/src/tableschema.cpp +++ b/geodiff/src/tableschema.cpp @@ -225,6 +225,13 @@ void baseToPostgres( TableSchema &tbl ) } } +TableSchema *DatabaseSchema::tableByName( const std::string &name ) +{ + auto it = std::find_if( tables.begin(), tables.end(), + [&name]( const TableSchema & t ) { return t.name == name; } ); + return it != tables.end() ? &*it : nullptr; +} + void tableSchemaConvert( const std::string &driverDstName, TableSchema &tbl ) { if ( driverDstName == Driver::SQLITEDRIVERNAME ) diff --git a/geodiff/src/tableschema.h b/geodiff/src/tableschema.h index 96fe49ed..4292aec1 100644 --- a/geodiff/src/tableschema.h +++ b/geodiff/src/tableschema.h @@ -229,6 +229,9 @@ struct TableSchema struct DatabaseSchema { std::vector tables; + + //! Returns pointer to the table with the given name, or nullptr if not found + TableSchema *tableByName( const std::string &name ); }; //! Converts column name to base type and returns struct with both names diff --git a/geodiff/src/tableschemadiff.cpp b/geodiff/src/tableschemadiff.cpp index 5082c146..8419e2d7 100644 --- a/geodiff/src/tableschemadiff.cpp +++ b/geodiff/src/tableschemadiff.cpp @@ -7,6 +7,7 @@ #include "changeset.h" #include "geodiffutils.hpp" #include "tableschema.h" +#include #include #include @@ -35,6 +36,61 @@ static std::unordered_map byName( const std::vector & return map; } +void simulateColumnChange( TableSchema &schema, const ChangesetEntry &entry ) +{ + if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + { + auto it = std::find_if( schema.columns.begin(), schema.columns.end(), + [&]( const TableColumnInfo & c ) { return c.name == acEntry->column.name; } ); + if ( it != schema.columns.end() ) + throw GeoDiffException( "Tried simulating addition of already-existing column " + acEntry->column.name ); + schema.columns.push_back( acEntry->column ); + } + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + { + auto it = std::find_if( schema.columns.begin(), schema.columns.end(), + [&]( const TableColumnInfo & c ) { return c.name == dcEntry->column.name; } ); + if ( it == schema.columns.end() ) + throw GeoDiffException( "Tried simulating deletion of non-existent column " + dcEntry->column.name ); + schema.columns.erase( it ); + } +} + +void simulateSchemaChange( DatabaseSchema &schema, const ChangesetEntry &entry ) +{ + if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) + { + if ( schema.tableByName( ctEntry->tableName ) ) + throw GeoDiffException( "Tried simulating creation of already-existing table " + ctEntry->tableName ); + TableSchema ts; + ts.name = ctEntry->tableName; + ts.columns = ctEntry->columns; + schema.tables.push_back( ts ); + } + else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + { + auto it = std::find_if( schema.tables.begin(), schema.tables.end(), + [&]( const TableSchema & t ) { return t.name == dtEntry->tableName; } ); + if ( it == schema.tables.end() ) + throw GeoDiffException( "Tried simulating deletion of non-existent table " + dtEntry->tableName ); + schema.tables.erase( it ); + } + else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + { + TableSchema *table = schema.tableByName( acEntry->tableName ); + if ( !table ) + throw GeoDiffException( "Tried to add column " + acEntry->column.name + " to non-existent table " + acEntry->tableName ); + simulateColumnChange( *table, entry ); + } + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + { + TableSchema *table = schema.tableByName( dcEntry->tableName ); + if ( !table ) + throw GeoDiffException( "Tried to delete column " + dcEntry->column.name + " from non-existent table " + dcEntry->tableName ); + simulateColumnChange( *table, entry ); + } +} + std::vector diffTableSchema( const TableSchema &base, const TableSchema &modified ) { if ( base.crs != modified.crs ) @@ -74,7 +130,7 @@ std::vector diffTableSchema( const TableSchema &base, const Tabl { // Compare column type by base type enum rather than the exact db-specific // string to avoid regression with DB pairs that use compatible types. - if ( !baseColumns.at(colName)->compareWithBaseTypes( *modifiedColumns.at(colName) ) ) + if ( !baseColumns.at( colName )->compareWithBaseTypes( *modifiedColumns.at( colName ) ) ) throw GeoDiffException( "Columns differ: " + base.name + "." + colName + " and " + modified.name + "." + colName + ")" ); } diff --git a/geodiff/src/tableschemadiff.hpp b/geodiff/src/tableschemadiff.hpp index e1912a4e..d5be967a 100644 --- a/geodiff/src/tableschemadiff.hpp +++ b/geodiff/src/tableschemadiff.hpp @@ -11,5 +11,7 @@ std::vector diffTableSchema( const TableSchema &base, const TableSchema &modified ); std::vector diffDatabaseSchema( const DatabaseSchema &base, const DatabaseSchema &modified ); +void simulateColumnChange( TableSchema &schema, const ChangesetEntry &entry ); +void simulateSchemaChange( DatabaseSchema &schema, const ChangesetEntry &entry ); #endif // TABLESCHEMADIFF_H From bc2fc9c23d50962c18f7fbbe56d2308012647d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 15 May 2026 10:48:20 +0200 Subject: [PATCH 08/21] Change modified schema tests to pass We now explicitly support these schema changes. --- geodiff/tests/geodiff_testutils.cpp | 5 +++ geodiff/tests/geodiff_testutils.hpp | 1 + geodiff/tests/test_modified_scheme.cpp | 42 +++++++++++------- .../changesets/added_attribute.diff | Bin 0 -> 130 bytes .../changesets/added_table.diff | Bin 0 -> 242 bytes .../changesets/delete_attribute.diff | Bin 0 -> 130 bytes .../changesets/delete_table.diff | Bin 0 -> 242 bytes .../changesets/rename_attribute.diff | Bin 0 -> 202 bytes .../changesets/rename_table.diff | Bin 0 -> 481 bytes 9 files changed, 33 insertions(+), 15 deletions(-) create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/added_attribute.diff create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/added_table.diff create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/delete_attribute.diff create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/delete_table.diff create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/rename_attribute.diff create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/rename_table.diff diff --git a/geodiff/tests/geodiff_testutils.cpp b/geodiff/tests/geodiff_testutils.cpp index bab76935..edc6fa6c 100644 --- a/geodiff/tests/geodiff_testutils.cpp +++ b/geodiff/tests/geodiff_testutils.cpp @@ -64,6 +64,11 @@ std::string pathjoin( const std::string &dir, const std::string &dir2, const std return res; } +std::string pathjoin( const std::string &dir, const std::string &dir2, const std::string &dir3, const std::string &filename ) +{ + return pathjoin( pathjoin( dir, dir2, dir3 ), filename ); +} + std::string testdir() { return TEST_DATA_DIR; diff --git a/geodiff/tests/geodiff_testutils.hpp b/geodiff/tests/geodiff_testutils.hpp index 6fe1038e..54d14542 100644 --- a/geodiff/tests/geodiff_testutils.hpp +++ b/geodiff/tests/geodiff_testutils.hpp @@ -24,6 +24,7 @@ std::string testdir(); std::string pathjoin( const std::string &dir, const std::string &filename ); std::string pathjoin( const std::string &dir, const std::string &dir2, const std::string &filename ); +std::string pathjoin( const std::string &dir, const std::string &dir2, const std::string &dir3, const std::string &filename ); void makedir( const std::string &dir ); void init_test(); diff --git a/geodiff/tests/test_modified_scheme.cpp b/geodiff/tests/test_modified_scheme.cpp index e857e0d6..05801c95 100644 --- a/geodiff/tests/test_modified_scheme.cpp +++ b/geodiff/tests/test_modified_scheme.cpp @@ -16,35 +16,41 @@ TEST( ModifiedSchemeSqlite3Test, add_attribute ) std::string base = pathjoin( testdir(), "base.gpkg" ); std::string modified = pathjoin( testdir(), "modified_scheme", "added_attribute.gpkg" ); - std::string changeset = pathjoin( tmpdir(), testname, "changeset.bin" ); + std::string changeset = pathjoin( tmpdir(), testname, "changeset.diff" ); + std::string expected = pathjoin( testdir(), "modified_scheme", "changesets", "added_attribute.diff" ); - ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_ERROR ); + ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_SUCCESS ); + EXPECT_TRUE( fileContentEquals( changeset, expected ) ); } TEST( ModifiedSchemeSqlite3Test, add_table ) { - std::cout << "geopackage add table to table" << std::endl; + std::cout << "geopackage add table to database" << std::endl; std::string testname = "add_table"; makedir( pathjoin( tmpdir(), testname ) ); std::string base = pathjoin( testdir(), "base.gpkg" ); std::string modified = pathjoin( testdir(), "modified_scheme", "added_table.gpkg" ); - std::string changeset = pathjoin( tmpdir(), testname, "changeset.bin" ); + std::string changeset = pathjoin( tmpdir(), testname, "changeset.diff" ); + std::string expected = pathjoin( testdir(), "modified_scheme", "changesets", "added_table.diff" ); - ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_ERROR ); + ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_SUCCESS ); + EXPECT_TRUE( fileContentEquals( changeset, expected ) ); } TEST( ModifiedSchemeSqlite3Test, delete_attribute ) { - std::cout << "geopackage add attribute to table" << std::endl; + std::cout << "geopackage delete attribute from table" << std::endl; std::string testname = "delete_attribute"; makedir( pathjoin( tmpdir(), testname ) ); std::string base = pathjoin( testdir(), "modified_scheme", "added_attribute.gpkg" ); std::string modified = pathjoin( testdir(), "base.gpkg" ); - std::string changeset = pathjoin( tmpdir(), testname, "changeset.bin" ); + std::string changeset = pathjoin( tmpdir(), testname, "changeset.diff" ); + std::string expected = pathjoin( testdir(), "modified_scheme", "changesets", "delete_attribute.diff" ); - ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_ERROR ); + ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_SUCCESS ); + EXPECT_TRUE( fileContentEquals( changeset, expected ) ); } TEST( ModifiedSchemeSqlite3Test, delete_table ) @@ -55,22 +61,26 @@ TEST( ModifiedSchemeSqlite3Test, delete_table ) std::string base = pathjoin( testdir(), "modified_scheme", "added_table.gpkg" ); std::string modified = pathjoin( testdir(), "base.gpkg" ); - std::string changeset = pathjoin( tmpdir(), testname, "changeset.bin" ); + std::string changeset = pathjoin( tmpdir(), testname, "changeset.diff" ); + std::string expected = pathjoin( testdir(), "modified_scheme", "changesets", "delete_table.diff" ); - ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_ERROR ); + ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_SUCCESS ); + EXPECT_TRUE( fileContentEquals( changeset, expected ) ); } TEST( ModifiedSchemeSqlite3Test, rename_table ) { std::cout << "geopackage table count is same, but tables have different name" << std::endl; - std::string testname = "delete_table"; + std::string testname = "rename_table"; makedir( pathjoin( tmpdir(), testname ) ); std::string base = pathjoin( testdir(), "modified_scheme", "added_table.gpkg" ); std::string modified = pathjoin( testdir(), "modified_scheme", "added_table2.gpkg" ); - std::string changeset = pathjoin( tmpdir(), testname, "changeset.bin" ); + std::string changeset = pathjoin( tmpdir(), testname, "changeset.diff" ); + std::string expected = pathjoin( testdir(), "modified_scheme", "changesets", "rename_table.diff" ); - ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_ERROR ); + ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_SUCCESS ); + EXPECT_TRUE( fileContentEquals( changeset, expected ) ); } TEST( ModifiedSchemeSqlite3Test, rename_attribute ) @@ -81,9 +91,11 @@ TEST( ModifiedSchemeSqlite3Test, rename_attribute ) std::string base = pathjoin( testdir(), "modified_scheme", "added_attribute.gpkg" ); std::string modified = pathjoin( testdir(), "modified_scheme", "added_attribute2.gpkg" ); - std::string changeset = pathjoin( tmpdir(), testname, "changeset.bin" ); + std::string changeset = pathjoin( tmpdir(), testname, "changeset.diff" ); + std::string expected = pathjoin( testdir(), "modified_scheme", "changesets", "rename_attribute.diff" ); - ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_ERROR ); + ASSERT_EQ( GEODIFF_createChangeset( testContext(), base.c_str(), modified.c_str(), changeset.c_str() ), GEODIFF_SUCCESS ); + EXPECT_TRUE( fileContentEquals( changeset, expected ) ); } int main( int argc, char **argv ) diff --git a/geodiff/tests/testdata/modified_scheme/changesets/added_attribute.diff b/geodiff/tests/testdata/modified_scheme/changesets/added_attribute.diff new file mode 100644 index 0000000000000000000000000000000000000000..d3ca00329f579d63d121179efe180177f3b76abc GIT binary patch literal 130 zcmYc;&de>yNo7b(Nl8tKPs>crNnv1QVE7LMA*?_i0|Qi*I7ki*Kr}0e1s6cbF`>&b KA>^3R<(L5?DiehO literal 0 HcmV?d00001 diff --git a/geodiff/tests/testdata/modified_scheme/changesets/added_table.diff b/geodiff/tests/testdata/modified_scheme/changesets/added_table.diff new file mode 100644 index 0000000000000000000000000000000000000000..94093e4376f5c223f54104715d2178e4bee9a47c GIT binary patch literal 242 zcmYdHOi4*ii7!b^%1LElPRmSTU}R_b4+H6``MIejMU@P!90C45k?#I}3=7k8E7LMl zb5a<90H*GL2s0xC1F}&<3_t-eU}R}zbq`=*ekQ=c@Ii5QeWuRa{TjhNere{n_Zt~4 zj5M^kY|phXvsIJxsXZezPzy*SD-icci7BQ%eziY+!RJqrHy+x$BvMhbPA7&W3erDE;)SR4r0P#~{#Q*>R literal 0 HcmV?d00001 diff --git a/geodiff/tests/testdata/modified_scheme/changesets/delete_attribute.diff b/geodiff/tests/testdata/modified_scheme/changesets/delete_attribute.diff new file mode 100644 index 0000000000000000000000000000000000000000..f9e07b4561e2e3b5b75ad6f0e5295193bc185deb GIT binary patch literal 130 zcmWGxWn=(?;>_HFoKyyJ1~3mO0HTrE46Fz_CL}o!8(EGSNe;wjV0DI?otTo6ni8Lu NnVORVGWkCY002-%6J-DZ literal 0 HcmV?d00001 diff --git a/geodiff/tests/testdata/modified_scheme/changesets/delete_table.diff b/geodiff/tests/testdata/modified_scheme/changesets/delete_table.diff new file mode 100644 index 0000000000000000000000000000000000000000..79e3557b0f33f3ce686afd365f0d14efe9f54fd2 GIT binary patch literal 242 zcmWGxW@KPUOi4*ii7!b^%1LG5WB>|)0V7Kzt9t+g^D_Ykh7XFf>oaxU?$-$J@k=wm zz2C@aVWgqOWqYoDnXQ_fPwg3*fm%QsS%J7mN=z~B@vHsm3qF5}yz$T;rukatl*9-B zF59CUG9jb4k!9)Q{V>DO^)s_(q~_%0J0d%iIW04VfsvizKMbU&=I5rC6jd^?as>GM YM7sO?F)U2WtxU^I%}D`y2d3^n09ra>oB#j- literal 0 HcmV?d00001 diff --git a/geodiff/tests/testdata/modified_scheme/changesets/rename_attribute.diff b/geodiff/tests/testdata/modified_scheme/changesets/rename_attribute.diff new file mode 100644 index 0000000000000000000000000000000000000000..f864165bc12391de1912c7e78a8a123c2cc64552 GIT binary patch literal 202 zcmWGxWn=(?;>_HFoKyyJ1~3mO0HTrE46Fz_CL}o!8(EGSNe;wjV0DI?otTo6ni8Lu pnVORVGWkCYBtzx%Qp@8LOG=827=Zd|)0V7Kzt9t+g^D_Ykh7XFf>oaxU?$-$J@k=wm zz2C@aVWgqOWqYoDnXQ_fPwg3*fm%QsS%J7mN=z~B@vHsm3qF5}yz$T;rukatl*9-B zF59CUG9jb4k!9)Q{V>DO^)s_(q~_%0J0d%iIW04VfsvizKMbU&=I5rC6jd^?as>GM zM7sO?F)U2WtxU^I%}D`y2d3_SVo_>dVlLdj_zd##^m7dk3G(!F2bz_bmQtKnjA~X0 zDD+Ux6G9F@Mp*dW4K)r*^M1EKZmwJ61eaU;D^5<)yymvo{;`ASztX*T?HQS%VYjPJ zW-edDtNjQ3w_di3xnmF02-DEsbn{h{$&>vsjSjj60W7u;_RH@7`#bL2b$gfveZSTQ KHWgm7X9WN(gvjau literal 0 HcmV?d00001 From 216e5bb039d48c43cb82103f35c6c2e9ac49724b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 15 May 2026 23:41:15 +0200 Subject: [PATCH 09/21] Fix bugs in schema changes in SQLite --- geodiff/src/driver.h | 8 ++ geodiff/src/drivers/postgresdriver.cpp | 35 +++++++ geodiff/src/drivers/postgresdriver.h | 1 + geodiff/src/drivers/postgresutils.h | 6 ++ geodiff/src/drivers/sqlitedriver.cpp | 137 +++++++++++++++++-------- geodiff/src/drivers/sqlitedriver.h | 2 + geodiff/src/tableschemadiff.cpp | 2 +- 7 files changed, 149 insertions(+), 42 deletions(-) diff --git a/geodiff/src/driver.h b/geodiff/src/driver.h index fd1e1f20..92f8c9f0 100644 --- a/geodiff/src/driver.h +++ b/geodiff/src/driver.h @@ -11,6 +11,7 @@ #include #include +#include "changeset.h" #include "geodiff.h" #include "tableschema.h" @@ -126,6 +127,13 @@ class Driver */ virtual void dumpData( ChangesetWriter &writer, bool useModified = false ) = 0; + /** + * Executes SQL statement on 'base' database. Returns list of rows, which + * are lists of values in columns, in the database's native string + * representation. + */ + virtual std::vector> executeSql( std::string sql ) = 0; + static const std::string SQLITEDRIVERNAME; static const std::string POSTGRESDRIVERNAME; diff --git a/geodiff/src/drivers/postgresdriver.cpp b/geodiff/src/drivers/postgresdriver.cpp index 4d6f032d..e98b8e0c 100644 --- a/geodiff/src/drivers/postgresdriver.cpp +++ b/geodiff/src/drivers/postgresdriver.cpp @@ -1087,3 +1087,38 @@ void PostgresDriver::dumpData( ChangesetWriter &writer, bool useModified ) } } } + +class SearchPathScope +{ + public: + SearchPathScope( PGconn *conn, std::string prependSchema ) + : mConn( conn ) + { + PostgresResult res = execSql( mConn, "SHOW search_path" ); + mOldSearchPath = res.value( 0, 0 ); + std::string newSearchPath = prependSchema + "," + mOldSearchPath; + execSql( mConn, "SET search_path TO " + newSearchPath ); + } + ~SearchPathScope() + { + execSql( mConn, "SET search_path TO " + mOldSearchPath ); + } + private: + std::string mOldSearchPath; + PGconn *mConn; +}; + +std::vector> PostgresDriver::executeSql( std::string sql ) +{ + SearchPathScope spScope( mConn, mBaseSchema ); + PostgresResult res = execSql( mConn, sql ); + std::vector> rows; + rows.resize( res.rowCount() ); + for ( size_t r = 0; r < rows.size(); ++r ) + { + rows[r].resize( res.columnCount() ); + for ( size_t i = 0; i < rows[r].size(); ++i ) + rows[r][i] = res.value( r, i ); + } + return rows; +} diff --git a/geodiff/src/drivers/postgresdriver.h b/geodiff/src/drivers/postgresdriver.h index 12b5f6cf..ac5126e6 100644 --- a/geodiff/src/drivers/postgresdriver.h +++ b/geodiff/src/drivers/postgresdriver.h @@ -44,6 +44,7 @@ class PostgresDriver : public Driver void applyChangeset( ChangesetReader &reader ) override; void createTables( const std::vector &tables ) override; void dumpData( ChangesetWriter &writer, bool useModified = false ) override; + std::vector> executeSql( std::string sql ) override; private: void logApplyConflict( const std::string &type, const ChangesetDataEntry &entry ) const; diff --git a/geodiff/src/drivers/postgresutils.h b/geodiff/src/drivers/postgresutils.h index 2d6d2b30..e44a7025 100644 --- a/geodiff/src/drivers/postgresutils.h +++ b/geodiff/src/drivers/postgresutils.h @@ -64,6 +64,12 @@ class PostgresResult return ::PQntuples( mResult ); } + int columnCount() const + { + assert( mResult ); + return ::PQnfields( mResult ); + } + std::string affectedRows() const { assert( mResult ); diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 02a112b9..38f9f799 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -779,29 +779,34 @@ void SqliteDriver::createChangeset( ChangesetWriter &writer ) simulateColumnChange( currentSchemata[dcEntry->tableName], entry ); } - for ( const TableSchema &tblBase : schemaBase.tables ) + for ( const TableSchema &tblModified : schemaModified.tables ) { - if ( !tblBase.hasPrimaryKey() ) + if ( !tblModified.hasPrimaryKey() ) continue; // ignore tables without primary key - they can't be compared properly - // Find corresponding table in modified DB - const TableSchema *tblModified = nullptr; - for ( const TableSchema &tbl : schemaModified.tables ) + // Find corresponding table in base DB + const TableSchema *tblBase = nullptr; + for ( const TableSchema &tbl : schemaBase.tables ) { - if ( tbl.name == tblBase.name ) + if ( tbl.name == tblModified.name ) { - tblModified = &tbl; + tblBase = &tbl; break; } } - if ( !tblModified ) - continue; // Table was deleted - TableDiffContext diffContext = { mDb, tblBase, *tblModified, {}, {}, writer }; + if ( !tblBase ) + { + // Table was newly added, just dump data using INSERTs + dumpTableData( writer, tblModified, true ); + continue; + } + + TableDiffContext diffContext = { mDb, *tblBase, tblModified, {}, {}, writer }; - for ( const TableColumnInfo &baseColumn : tblBase.columns ) + for ( const TableColumnInfo &baseColumn : tblBase->columns ) { - for ( const TableColumnInfo &modifiedColumn : tblModified->columns ) + for ( const TableColumnInfo &modifiedColumn : tblModified.columns ) { if ( baseColumn.name == modifiedColumn.name ) { @@ -811,10 +816,10 @@ void SqliteDriver::createChangeset( ChangesetWriter &writer ) } } - for ( const TableColumnInfo &modifiedColumn : tblModified->columns ) + for ( const TableColumnInfo &modifiedColumn : tblModified.columns ) { bool found = false; - for ( const TableColumnInfo &baseColumn : tblBase.columns ) + for ( const TableColumnInfo &baseColumn : tblBase->columns ) { if ( baseColumn.name == modifiedColumn.name ) { @@ -1200,6 +1205,27 @@ static void createTable( std::shared_ptr db, const TableSchema &tbl ) } } +static void removeGpkgSpatialTable( std::shared_ptr db, std::string tableName ) +{ + { + Sqlite3Stmt stmt; + stmt.prepare( db, "DELETE FROM gpkg_contents WHERE table_name = '%q'", + tableName.c_str() ); + int res = sqlite3_step( stmt.get() ); + if ( res != SQLITE_DONE ) + throwSqliteError( db->get(), "Failed to delete table from gpkg_contents table" ); + } + + { + Sqlite3Stmt stmt; + stmt.prepare( db, "DELETE FROM gpkg_geometry_columns WHERE table_name = '%q'", + tableName.c_str() ); + int res = sqlite3_step( stmt.get() ); + if ( res != SQLITE_DONE ) + throwSqliteError( db->get(), "Failed to delete table from gpkg_geometry_columns table" ); + } +} + void SqliteDriver::applySchemaChange( const ChangesetEntry &entry ) { if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) @@ -1249,6 +1275,7 @@ void SqliteDriver::applySchemaChange( const ChangesetEntry &entry ) logApplyConflict( "drop_table_failed", entry, true ); throwSqliteError( mDb->get(), "Failure deleting table: " + dtEntry->tableName ); } + removeGpkgSpatialTable( mDb, dtEntry->tableName ); } else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) { @@ -1462,42 +1489,70 @@ void SqliteDriver::createTables( const std::vector &tables ) } } +void SqliteDriver::dumpTableData( ChangesetWriter &writer, TableSchema tbl, bool useModified ) +{ + std::string dbName = databaseName( useModified ); + if ( !tbl.hasPrimaryKey() ) + return; // ignore tables without primary key - they can't be compared properly + + bool first = true; + Sqlite3Stmt statementI; + statementI.prepare( mDb, "SELECT * FROM \"%w\".\"%w\"", dbName.c_str(), tbl.name.c_str() ); + int rc; + while ( SQLITE_ROW == ( rc = sqlite3_step( statementI.get() ) ) ) + { + if ( first ) + { + writer.beginTable( schemaToChangesetTable( tbl.name, tbl ) ); + first = false; + } + + ChangesetDataEntry e; + e.op = ChangesetDataEntry::OpInsert; + size_t numColumns = tbl.columns.size(); + for ( size_t i = 0; i < numColumns; ++i ) + { + Sqlite3Value v( sqlite3_column_value( statementI.get(), static_cast( i ) ) ); + e.newValues.push_back( changesetValue( v.value() ) ); + } + writer.writeEntry( e ); + } + if ( rc != SQLITE_DONE ) + { + logSqliteError( context(), mDb, "Failure dumping changeset" ); + } +} void SqliteDriver::dumpData( ChangesetWriter &writer, bool useModified ) { - std::string dbName = databaseName( useModified ); std::vector tables = listTables(); for ( const std::string &tableName : tables ) { TableSchema tbl = tableSchema( tableName, useModified ); - if ( !tbl.hasPrimaryKey() ) - continue; // ignore tables without primary key - they can't be compared properly - - bool first = true; - Sqlite3Stmt statementI; - statementI.prepare( mDb, "SELECT * FROM \"%w\".\"%w\"", dbName.c_str(), tableName.c_str() ); - int rc; - while ( SQLITE_ROW == ( rc = sqlite3_step( statementI.get() ) ) ) - { - if ( first ) - { - writer.beginTable( schemaToChangesetTable( tableName, tbl ) ); - first = false; - } + dumpTableData( writer, tbl, useModified ); + } +} - ChangesetDataEntry e; - e.op = ChangesetDataEntry::OpInsert; - size_t numColumns = tbl.columns.size(); - for ( size_t i = 0; i < numColumns; ++i ) - { - Sqlite3Value v( sqlite3_column_value( statementI.get(), static_cast( i ) ) ); - e.newValues.push_back( changesetValue( v.value() ) ); - } - writer.writeEntry( e ); - } - if ( rc != SQLITE_DONE ) +std::vector> SqliteDriver::executeSql( std::string sql ) +{ + Sqlite3Stmt stmt; + stmt.prepare( mDb, "%s", sql.c_str() ); + std::vector> rows; + int rc; + while ( ( rc = sqlite3_step( stmt.get() ) ) == SQLITE_ROW ) + { + std::vector values; + values.resize( sqlite3_column_count( stmt.get() ) ); + for ( size_t i = 0; i < values.size(); ++i ) { - logSqliteError( context(), mDb, "Failure dumping changeset" ); + const unsigned char *text = sqlite3_column_text( stmt.get(), static_cast( i ) ); + values.push_back( reinterpret_cast( text ) ); } + rows.push_back( values ); + } + if ( rc != SQLITE_DONE ) + { + logSqliteError( context(), mDb, "Failure executing SQL: " + sql ); } + return rows; } diff --git a/geodiff/src/drivers/sqlitedriver.h b/geodiff/src/drivers/sqlitedriver.h index 228c2c59..9dbe1f81 100644 --- a/geodiff/src/drivers/sqlitedriver.h +++ b/geodiff/src/drivers/sqlitedriver.h @@ -57,12 +57,14 @@ class SqliteDriver : public Driver void applyChangeset( ChangesetReader &reader ) override; void createTables( const std::vector &tables ) override; void dumpData( ChangesetWriter &writer, bool useModified = false ) override; + std::vector> executeSql( std::string sql ) override; private: void logApplyConflict( const std::string &type, const ChangesetEntry &entry, bool isDbErr = false ) const; ChangeApplyResult applyDataChange( SqliteChangeApplyState &state, const ChangesetDataEntry &entry ); void applySchemaChange( const ChangesetEntry &entry ); std::string databaseName( bool useModified = false ); + void dumpTableData( ChangesetWriter &writer, TableSchema tbl, bool useModified ); std::shared_ptr mDb; bool mHasModified = false; // whether there is also a second file attached diff --git a/geodiff/src/tableschemadiff.cpp b/geodiff/src/tableschemadiff.cpp index 8419e2d7..00bd1594 100644 --- a/geodiff/src/tableschemadiff.cpp +++ b/geodiff/src/tableschemadiff.cpp @@ -132,7 +132,7 @@ std::vector diffTableSchema( const TableSchema &base, const Tabl // string to avoid regression with DB pairs that use compatible types. if ( !baseColumns.at( colName )->compareWithBaseTypes( *modifiedColumns.at( colName ) ) ) throw GeoDiffException( "Columns differ: " + - base.name + "." + colName + " and " + modified.name + "." + colName + ")" ); + base.name + "." + colName + " and " + modified.name + "." + colName ); } return entries; From 99a0517fb4f5d2cb6bc60b72e649e6ee830b2a57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 15 May 2026 23:44:18 +0200 Subject: [PATCH 10/21] Add more tests for schema change diffs --- geodiff/tests/test_modified_scheme.cpp | 222 ++++++++++++++++++++++++- 1 file changed, 221 insertions(+), 1 deletion(-) diff --git a/geodiff/tests/test_modified_scheme.cpp b/geodiff/tests/test_modified_scheme.cpp index 05801c95..6382944f 100644 --- a/geodiff/tests/test_modified_scheme.cpp +++ b/geodiff/tests/test_modified_scheme.cpp @@ -3,10 +3,19 @@ Copyright (C) 2019 Peter Petrik */ +#include +#include + #include "gtest/gtest.h" -#include "geodiff_testutils.hpp" + +#include "changesetreader.h" +#include "changesetutils.h" +#include "changesetwriter.h" +#include "driver.h" #include "geodiff.h" +#include "geodiff_testutils.hpp" #include "geodiffutils.hpp" +#include "tableschema.h" TEST( ModifiedSchemeSqlite3Test, add_attribute ) { @@ -98,6 +107,217 @@ TEST( ModifiedSchemeSqlite3Test, rename_attribute ) EXPECT_TRUE( fileContentEquals( changeset, expected ) ); } +// Create driver and fill DB with sample data +static std::unique_ptr createSampleDb( std::string driverName, std::string testname, std::string dbname ) +{ + DriverParametersMap params; + if ( driverName == "sqlite" ) + { + std::string dir = pathjoin( tmpdir(), testname ); + makedir( dir ); + params["base"] = pathjoin( dir, dbname + ".gpkg" ); + } + else if ( driverName == "postgres" ) + { + params["base"] = testname + "_" + dbname; + params["conninfo"] = pgTestConnInfo(); + } + + std::unique_ptr driver( Driver::createDriver( static_cast( testContext() ), driverName ) ); + driver->create( params, true ); + + TableColumnInfo fidCol; + fidCol.name = "fid"; + fidCol.type = columnType( static_cast( testContext() ), "integer", driverName ); + fidCol.isPrimaryKey = true; + fidCol.isAutoIncrement = true; + + TableColumnInfo geometryCol; + geometryCol.name = "geometry"; + geometryCol.type = columnType( static_cast( testContext() ), "point", driverName, true ); + geometryCol.isGeometry = true; + + TableColumnInfo nameCol; + nameCol.name = "name"; + nameCol.type = columnType( static_cast( testContext() ), "text", driverName ); + + driver->createTables( + { + {"tram_stops", { fidCol, geometryCol, nameCol } }, + } ); + + driver->executeSql( "INSERT INTO tram_stops(fid, name) VALUES " + "(1, 'Ohrada'), " + "(2, 'Petřiny'), " + "(3, 'Park Maxe van der Stoela')" ); + + return driver; +} + +// Open driver with both base & modified as created by createBaseDb above +static std::unique_ptr openBaseModifiedDb( std::string driverName, std::string testname, std::string baseName, std::string modifiedName ) +{ + DriverParametersMap params; + if ( driverName == "sqlite" ) + { + std::string dir = pathjoin( tmpdir(), testname ); + makedir( dir ); + params["base"] = pathjoin( dir, baseName + ".gpkg" ); + if ( modifiedName.size() ) + params["modified"] = pathjoin( dir, modifiedName + ".gpkg" ); + } + else if ( driverName == "postgres" ) + { + params["base"] = testname + "_" + baseName; + if ( modifiedName.size() ) + params["modified"] = testname + "_" + modifiedName; + params["conninfo"] = pgTestConnInfo(); + } + + std::unique_ptr driver( Driver::createDriver( static_cast( testContext() ), driverName ) ); + driver->open( params ); + return driver; +} + +static void testSchemaDiffWith( std::string driverName, std::string testname, std::function modification ) +{ + // Create base and modified DB + { + std::unique_ptr baseDb = createSampleDb( driverName, testname, "base" ); + std::unique_ptr modifiedDb = createSampleDb( driverName, testname, "modified" ); + modification( *modifiedDb ); + } + + // Create diff base->modified + std::string diffPath = pathjoin( tmpdir(), testname, "diff" ); + { + std::unique_ptr baseModifiedDriver = openBaseModifiedDb( driverName, testname, "base", "modified" ); + ChangesetWriter writer; + writer.open( diffPath ); + baseModifiedDriver->createChangeset( writer ); + } + + // Apply diff to base + { + ChangesetReader reader; + reader.open( diffPath ); + std::unique_ptr baseDb = openBaseModifiedDb( driverName, testname, "base", "" ); + baseDb->applyChangeset( reader ); + } + + // Check that base and modified are now equal + std::string diff2Path = pathjoin( tmpdir(), testname, "diff2" ); + { + std::unique_ptr baseModifiedDriver = openBaseModifiedDb( driverName, testname, "base", "modified" ); + ChangesetWriter writer; + writer.open( diff2Path ); + baseModifiedDriver->createChangeset( writer ); + } + uintmax_t diff2Size = std::filesystem::file_size( diff2Path ); + ASSERT_EQ( diff2Size, 0 ); + + // Invert diff + std::string invertedDiffPath = pathjoin( tmpdir(), testname, "diff-inv" ); + { + ChangesetReader reader; + reader.open( diffPath ); + ChangesetWriter writer; + writer.open( invertedDiffPath ); + invertChangeset( reader, writer ); + } + + // Apply inverted diff to base + { + ChangesetReader reader; + reader.open( invertedDiffPath ); + std::unique_ptr baseDb = openBaseModifiedDb( driverName, testname, "base", "" ); + baseDb->applyChangeset( reader ); + } + + // Check that base and original base are now equal + std::string diff3Path = pathjoin( tmpdir(), testname, "diff2" ); + { + createSampleDb( driverName, testname, "base2" ); + std::unique_ptr baseModifiedDriver = openBaseModifiedDb( driverName, testname, "base2", "base" ); + ChangesetWriter writer; + writer.open( diff3Path ); + baseModifiedDriver->createChangeset( writer ); + } + uintmax_t diff3Size = std::filesystem::file_size( diff3Path ); + ASSERT_EQ( diff3Size, 0 ); +} + +TEST( ModifiedSchemeTest, create_table ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffWith( driverName, "create_table", [ = ]( Driver & modifiedDb ) + { + TableColumnInfo fidCol; + fidCol.name = "fid"; + fidCol.type = columnType( static_cast( testContext() ), "integer", driverName ); + fidCol.isPrimaryKey = true; + fidCol.isAutoIncrement = true; + + TableColumnInfo geometryCol; + geometryCol.name = "geometry"; + geometryCol.type = columnType( static_cast( testContext() ), "point", driverName, true ); + geometryCol.isGeometry = true; + + TableColumnInfo materialCol; + materialCol.name = "material"; + materialCol.type = columnType( static_cast( testContext() ), "text", driverName ); + + TableSchema benchesTable{"benches", {fidCol, geometryCol, materialCol}}; + + modifiedDb.createTables( {benchesTable} ); + modifiedDb.executeSql( "INSERT INTO benches (fid, material) VALUES (1, 'wood'), (2, 'steel')" ); + } ); +} + +TEST( ModifiedSchemeTest, add_column ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffWith( driverName, "add_column", [ = ]( Driver & modifiedDb ) + { + modifiedDb.executeSql( "INSERT INTO tram_stops (fid, name) VALUES (4, 'Palmovka')" ); + modifiedDb.executeSql( "UPDATE tram_stops SET name = 'Pohořelec' WHERE fid = 1" ); + modifiedDb.executeSql( "ALTER TABLE tram_stops ADD COLUMN bench_count integer" ); + modifiedDb.executeSql( "UPDATE tram_stops SET bench_count = 1 WHERE fid = 1" ); + modifiedDb.executeSql( "UPDATE tram_stops SET bench_count = 4 WHERE fid = 2" ); + } ); +} + +TEST( ModifiedSchemeTest, drop_column ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffWith( driverName, "drop_column", [ = ]( Driver & modifiedDb ) + { + modifiedDb.executeSql( "INSERT INTO tram_stops (fid, name) VALUES (4, 'Palmovka')" ); + modifiedDb.executeSql( "UPDATE tram_stops SET name = 'Pohořelec' WHERE fid = 1" ); + modifiedDb.executeSql( "ALTER TABLE tram_stops DROP COLUMN name" ); + modifiedDb.executeSql( "INSERT INTO tram_stops (fid) VALUES (5)" ); + } ); +} + +TEST( ModifiedSchemeTest, drop_table ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffWith( driverName, "drop_table", [ = ]( Driver & modifiedDb ) + { + modifiedDb.executeSql( "INSERT INTO tram_stops (fid, name) VALUES (4, 'Palmovka')" ); + modifiedDb.executeSql( "UPDATE tram_stops SET name = 'Pohořelec' WHERE fid = 1" ); + modifiedDb.executeSql( "DROP TABLE tram_stops" ); + } ); +} + int main( int argc, char **argv ) { testing::InitGoogleTest( &argc, argv ); From b5db8510f612f2ce97599e85b10aa4d61de58baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Sat, 16 May 2026 11:30:20 +0200 Subject: [PATCH 11/21] Fix formatting --- geodiff/src/changesetreader.cpp | 4 ++-- geodiff/src/drivers/sqlitedriver.cpp | 4 ++-- geodiff/tests/test_changeset_utils.cpp | 14 +++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/geodiff/src/changesetreader.cpp b/geodiff/src/changesetreader.cpp index 30022973..e70baf53 100644 --- a/geodiff/src/changesetreader.cpp +++ b/geodiff/src/changesetreader.cpp @@ -197,8 +197,8 @@ void ChangesetReader::readTableRecord() if ( nCol < 0 || nCol > 65536 ) throwReaderError( "readByte: unexpected number of columns" ); - mCurrentTable = std::make_shared(); - mCurrentTable->primaryKeys.clear(); + mCurrentTable = std::make_shared(); + mCurrentTable->primaryKeys.clear(); for ( int i = 0; i < nCol; ++i ) { diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 38f9f799..1f54f18a 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -256,7 +256,7 @@ bool tableExists( std::shared_ptr db, const std::string &tableName, c { Sqlite3Stmt stmtHasGeomColumnsInfo; stmtHasGeomColumnsInfo.prepare( db, "SELECT name FROM \"%w\".sqlite_master WHERE type='table' " - "AND name='%q'", dbName.c_str(), tableName.c_str() ); + "AND name='%q'", dbName.c_str(), tableName.c_str() ); return sqlite3_step( stmtHasGeomColumnsInfo.get() ) == SQLITE_ROW; } @@ -1130,7 +1130,7 @@ static void addGpkgSpatialTable( std::shared_ptr db, const TableSchem Sqlite3Stmt stmt; stmt.prepare( db, "INSERT INTO gpkg_contents (table_name, data_type, identifier, min_x, min_y, max_x, max_y, srs_id) " - "VALUES ('%q', 'features', '%q', %f, %f, %f, %f, %d)", + "VALUES ('%q', 'features', '%q', %f, %f, %f, %f, %d)", tbl.name.c_str(), tbl.name.c_str(), extent.minX, extent.minY, extent.maxX, extent.maxY, srsId ); int res = sqlite3_step( stmt.get() ); if ( res != SQLITE_DONE ) diff --git a/geodiff/tests/test_changeset_utils.cpp b/geodiff/tests/test_changeset_utils.cpp index 589396b9..4eaaed50 100644 --- a/geodiff/tests/test_changeset_utils.cpp +++ b/geodiff/tests/test_changeset_utils.cpp @@ -228,7 +228,7 @@ TEST( ChangesetUtils, test_concat_changesets_simple_table ) { Value(), Value(), Value::makeInt( 1 ) } ); ChangesetDataEntry fooUpdate123_inverse = ChangesetDataEntry::make( - tableFoo, ChangesetDataEntry::OpUpdate, + tableFoo, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "world" ), Value::makeInt( 4 ) }, { Value(), Value::makeText( "hello" ), Value::makeInt( 5 ) } ); @@ -355,12 +355,10 @@ TEST( ChangesetUtils, test_concat_changesets_multiple_tables ) { std::make_pair( "foo", std::vector( { ChangesetDataEntry::make( tableFoo, ChangesetDataEntry::OpInsert, {}, - { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } - ) } ) ), + { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } ) } ) ), std::make_pair( "bar", std::vector( { ChangesetDataEntry::make( tableBar, ChangesetDataEntry::OpInsert, {}, - { Value::makeInt( 123 ), Value::makeText( ":-)" ) } - ) } ) ) + { Value::makeInt( 123 ), Value::makeText( ":-)" ) } ) } ) ) } ); testConcat( "multi-unrelated-insert-update", @@ -373,13 +371,11 @@ TEST( ChangesetUtils, test_concat_changesets_multiple_tables ) { std::make_pair( "foo", std::vector( { ChangesetDataEntry::make( tableFoo, ChangesetDataEntry::OpInsert, {}, - { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } - ) } ) ), + { Value::makeInt( 123 ), Value::makeText( "hello" ), Value::makeInt( 5 ) } ) } ) ), std::make_pair( "bar", std::vector( { ChangesetDataEntry::make( tableBar, ChangesetDataEntry::OpUpdate, { Value::makeInt( 123 ), Value::makeText( "ha!" ) }, - { Value(), Value::makeText( ":-)" ) } - ) } ) ) + { Value(), Value::makeText( ":-)" ) } ) } ) ) } ); } From 7a61f5d27d1ab5b672d52fe6f8a40c8009e768dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Sat, 16 May 2026 11:41:30 +0200 Subject: [PATCH 12/21] Fix cppcheck warnings --- geodiff/src/changesetutils.cpp | 9 ++++----- geodiff/src/drivers/postgresdriver.cpp | 2 +- geodiff/src/drivers/sqlitedriver.cpp | 4 ++-- geodiff/src/geodiffrebase.cpp | 8 ++++---- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/geodiff/src/changesetutils.cpp b/geodiff/src/changesetutils.cpp index 2347dca1..b11b9e8a 100644 --- a/geodiff/src/changesetutils.cpp +++ b/geodiff/src/changesetutils.cpp @@ -27,7 +27,6 @@ ChangesetTable schemaToChangesetTable( const std::string &tableName, const Table // Returns inverted changeset entries in reverse order std::vector invertChangesetReverse( ChangesetReader &reader ) { - std::string currentTableName; std::vector invertedEntries; ChangesetEntry entry; while ( reader.nextEntry( entry ) ) @@ -75,28 +74,28 @@ std::vector invertChangesetReverse( ChangesetReader &reader ) throw GeoDiffException( "Unknown entry operation!" ); } } - else if ( ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) + else if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) { ChangesetDropTableEntry out; out.tableName = ctEntry->tableName; out.columns = ctEntry->columns; invertedEntries.push_back( out ); } - else if ( ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) + else if ( const ChangesetAddColumnEntry *acEntry = std::get_if( &entry ) ) { ChangesetDropColumnEntry out; out.tableName = acEntry->tableName; out.column = acEntry->column; invertedEntries.push_back( out ); } - else if ( ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) + else if ( const ChangesetDropTableEntry *dtEntry = std::get_if( &entry ) ) { ChangesetCreateTableEntry out; out.tableName = dtEntry->tableName; out.columns = dtEntry->columns; invertedEntries.push_back( out ); } - else if ( ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) + else if ( const ChangesetDropColumnEntry *dcEntry = std::get_if( &entry ) ) { ChangesetAddColumnEntry out; out.tableName = dcEntry->tableName; diff --git a/geodiff/src/drivers/postgresdriver.cpp b/geodiff/src/drivers/postgresdriver.cpp index e98b8e0c..4ec25b41 100644 --- a/geodiff/src/drivers/postgresdriver.cpp +++ b/geodiff/src/drivers/postgresdriver.cpp @@ -1091,7 +1091,7 @@ void PostgresDriver::dumpData( ChangesetWriter &writer, bool useModified ) class SearchPathScope { public: - SearchPathScope( PGconn *conn, std::string prependSchema ) + SearchPathScope( PGconn *conn, const std::string &prependSchema ) : mConn( conn ) { PostgresResult res = execSql( mConn, "SHOW search_path" ); diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 1f54f18a..1501f689 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -378,7 +378,7 @@ TableSchema SqliteDriver::tableSchema( const std::string &tableName, DatabaseSchema SqliteDriver::getSchema( bool useModified ) { std::vector tables; - for ( auto &name : listTables( useModified ) ) + for ( const std::string &name : listTables( useModified ) ) { tables.push_back( tableSchema( name, useModified ) ); } @@ -1205,7 +1205,7 @@ static void createTable( std::shared_ptr db, const TableSchema &tbl ) } } -static void removeGpkgSpatialTable( std::shared_ptr db, std::string tableName ) +static void removeGpkgSpatialTable( std::shared_ptr db, const std::string &tableName ) { { Sqlite3Stmt stmt; diff --git a/geodiff/src/geodiffrebase.cpp b/geodiff/src/geodiffrebase.cpp index 3f778276..60d87245 100644 --- a/geodiff/src/geodiffrebase.cpp +++ b/geodiff/src/geodiffrebase.cpp @@ -314,7 +314,7 @@ int _find_mapping_for_new_changeset( { if ( !std::holds_alternative( entry ) ) continue; - ChangesetDataEntry &dataEntry = std::get( entry ); + const ChangesetDataEntry &dataEntry = std::get( entry ); std::string tableName = dataEntry.table->name; @@ -692,7 +692,7 @@ void _prepare_new_changeset( const Context *context, if ( const ChangesetCreateTableEntry *ctEntry = std::get_if( &entry ) ) { schemaEntryTableName = ctEntry->tableName; - TableSchema *existing = outputSchema.tableByName( ctEntry->tableName ); + const TableSchema *existing = outputSchema.tableByName( ctEntry->tableName ); if ( existing ) { if ( existing->columns != ctEntry->columns ) @@ -750,13 +750,13 @@ void _prepare_new_changeset( const Context *context, ChangesetWriter writer; writer.open( changesetNew ); - for ( auto &it : tableChanges ) + for ( const auto &it : tableChanges ) { const std::vector &changes = it.second; if ( changes.empty() ) continue; - ChangesetTable *defWritten = nullptr; + const ChangesetTable *defWritten = nullptr; for ( const ChangesetEntry &writeEntry : changes ) { if ( auto dataEntry = std::get_if( &writeEntry ) ) From 6629bf75543a00496d7adc0010ca07acec718353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Sat, 16 May 2026 13:21:13 +0200 Subject: [PATCH 13/21] Add schema diff rebase tests --- geodiff/tests/test_modified_scheme.cpp | 174 +++++++++++++++++++++++++ 1 file changed, 174 insertions(+) diff --git a/geodiff/tests/test_modified_scheme.cpp b/geodiff/tests/test_modified_scheme.cpp index 6382944f..c057cc16 100644 --- a/geodiff/tests/test_modified_scheme.cpp +++ b/geodiff/tests/test_modified_scheme.cpp @@ -14,6 +14,7 @@ #include "driver.h" #include "geodiff.h" #include "geodiff_testutils.hpp" +#include "geodiffrebase.hpp" #include "geodiffutils.hpp" #include "tableschema.h" @@ -318,6 +319,179 @@ TEST( ModifiedSchemeTest, drop_table ) } ); } +static void testSchemaDiffRebaseWith( std::string driverName, std::string testname, int expectedConflicts, std::function theirs, std::function ours, std::function expected ) +{ + // Create base and modified Dbs + DatabaseSchema baseSchema; + { + std::unique_ptr baseDb = createSampleDb( driverName, testname, "base" ); + for ( const std::string &tableName : baseDb->listTables() ) + baseSchema.tables.push_back( baseDb->tableSchema( tableName ) ); + std::unique_ptr theirsDb = createSampleDb( driverName, testname, "theirs" ); + theirs( *theirsDb ); + std::unique_ptr oursDb = createSampleDb( driverName, testname, "ours" ); + ours( *oursDb ); + } + + // Create diff base->theirs + std::string base2TheirsPath = pathjoin( tmpdir(), testname, "base2theirs" ); + { + std::unique_ptr baseTheirsDriver = openBaseModifiedDb( driverName, testname, "base", "theirs" ); + ChangesetWriter writer; + writer.open( base2TheirsPath ); + baseTheirsDriver->createChangeset( writer ); + } + + // Create diff base->ours + std::string base2OursPath = pathjoin( tmpdir(), testname, "base2ours" ); + { + std::unique_ptr baseOursDriver = openBaseModifiedDb( driverName, testname, "base", "ours" ); + ChangesetWriter writer; + writer.open( base2OursPath ); + baseOursDriver->createChangeset( writer ); + } + + // Rebase base->ours diff to theirs->both + std::string theirs2bothPath = pathjoin( tmpdir(), testname, "theirs2both" ); + { + std::vector conflicts; + rebase( static_cast( testContext() ), baseSchema, base2TheirsPath, theirs2bothPath, base2OursPath, conflicts ); + ASSERT_EQ( conflicts.size(), 0 ) << conflicts.size() << " conflicts in rebase (more than " << expectedConflicts << "): " << conflictsToJSON( conflicts ).dump( 2 ); + } + + if ( expectedConflicts > 0 ) + return; + + // Apply both diffs to both + { + std::unique_ptr bothDb = createSampleDb( driverName, testname, "both" ); + + { + ChangesetReader reader; + reader.open( base2TheirsPath ); + bothDb->applyChangeset( reader ); + } + + { + ChangesetReader reader; + reader.open( theirs2bothPath ); + bothDb->applyChangeset( reader ); + } + } + + // Check that base and both are now equal + std::string expected2bothPath = pathjoin( tmpdir(), testname, "expected2both" ); + { + std::unique_ptr expectedDb = createSampleDb( driverName, testname, "expected" ); + expected( *expectedDb ); + + std::unique_ptr expectedBothDriver = openBaseModifiedDb( driverName, testname, "expected", "both" ); + ChangesetWriter writer; + writer.open( expected2bothPath ); + expectedBothDriver->createChangeset( writer ); + } + uintmax_t expected2bothSize = std::filesystem::file_size( expected2bothPath ); + ASSERT_EQ( expected2bothSize, 0 ); +} + +TEST( ModifiedSchemeTest, rebase_redundant_drop_column ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffRebaseWith( driverName, "rebase_redundant_drop_column", 0, + [ = ]( Driver & db ) + { + db.executeSql( "ALTER TABLE tram_stops DROP COLUMN name" ); + db.executeSql( "INSERT INTO tram_stops (fid) VALUES (4)" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "ALTER TABLE tram_stops DROP COLUMN name" ); + db.executeSql( "INSERT INTO tram_stops (fid) VALUES (4)" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "ALTER TABLE tram_stops DROP COLUMN name" ); + db.executeSql( "INSERT INTO tram_stops (fid) VALUES (4)" ); + db.executeSql( "INSERT INTO tram_stops (fid) VALUES (5)" ); + } ); +} + +TEST( ModifiedSchemeTest, rebase_redundant_drop_table ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffRebaseWith( driverName, "rebase_redundant_drop_table", 0, + [ = ]( Driver & db ) + { + db.executeSql( "DROP TABLE tram_stops" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "DROP TABLE tram_stops" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "DROP TABLE tram_stops" ); + } ); +} + +TEST( ModifiedSchemeTest, rebase_redundant_add_column ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffRebaseWith( driverName, "rebase_redundant_add_column", 0, + [ = ]( Driver & db ) + { + db.executeSql( "ALTER TABLE tram_stops ADD COLUMN bench_count integer" ); + db.executeSql( "INSERT INTO tram_stops (fid, name, bench_count) VALUES (4, 'Palmovka', 3)" ); + db.executeSql( "UPDATE tram_stops SET bench_count = 1 WHERE fid = 1" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "ALTER TABLE tram_stops ADD COLUMN bench_count integer" ); + db.executeSql( "UPDATE tram_stops SET bench_count = 1 WHERE fid = 1" ); + db.executeSql( "INSERT INTO tram_stops (fid, name, bench_count) VALUES (5, 'Drinopol', 2)" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "ALTER TABLE tram_stops ADD COLUMN bench_count integer" ); + db.executeSql( "INSERT INTO tram_stops (fid, name, bench_count) VALUES (4, 'Palmovka', 3)" ); + db.executeSql( "INSERT INTO tram_stops (fid, name, bench_count) VALUES (5, 'Drinopol', 2)" ); + db.executeSql( "UPDATE tram_stops SET bench_count = 1 WHERE fid = 1" ); + } ); +} + +TEST( ModifiedSchemeTest, rebase_redundant_create_table ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffRebaseWith( driverName, "rebase_redundant_create_table", 0, + [ = ]( Driver & db ) + { + db.executeSql( "CREATE TABLE vehicles (fid INTEGER, name TEXT, type TEXT)" ); + db.executeSql( "INSERT INTO vehicles VALUES (1, 'T3', 'tram')" ); + db.executeSql( "INSERT INTO vehicles VALUES (2, 'KT8D5', 'tram')" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "CREATE TABLE vehicles (fid INTEGER, name TEXT, type TEXT)" ); + db.executeSql( "INSERT INTO vehicles VALUES (1, 'T3', 'tram')" ); + db.executeSql( "INSERT INTO vehicles VALUES (2, '14T', 'tram')" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "CREATE TABLE vehicles (fid INTEGER, name TEXT, type TEXT)" ); + db.executeSql( "INSERT INTO vehicles VALUES (1, 'T3', 'tram')" ); + db.executeSql( "INSERT INTO vehicles VALUES (2, 'KT8D5', 'tram')" ); + db.executeSql( "INSERT INTO vehicles VALUES (3, '14T', 'tram')" ); + } ); +} + int main( int argc, char **argv ) { testing::InitGoogleTest( &argc, argv ); From 9166b31ce24ac080d59243bb254cc301a3c85eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 22 May 2026 10:02:21 +0200 Subject: [PATCH 14/21] Fix MSVC build --- geodiff/src/changesetwriter.cpp | 8 ++++---- geodiff/src/drivers/postgresdriver.cpp | 4 ++-- geodiff/src/drivers/sqlitedriver.cpp | 2 +- geodiff/tests/test_modified_scheme.cpp | 4 ++++ 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/geodiff/src/changesetwriter.cpp b/geodiff/src/changesetwriter.cpp index 392da299..47aa48ed 100644 --- a/geodiff/src/changesetwriter.cpp +++ b/geodiff/src/changesetwriter.cpp @@ -123,8 +123,8 @@ void ChangesetWriter::writeRowValues( const std::vector &values ) void ChangesetWriter::writeColumnInfo( const TableColumnInfo &column ) { writeNullTerminatedString( column.name ); - writeByte( column.type.baseType ); - writeByte( column.isPrimaryKey + writeByte( static_cast( column.type.baseType ) ); + writeByte( ( column.isPrimaryKey << 0 ) | ( column.isNotNull << 1 ) | ( column.isAutoIncrement << 2 ) | ( column.isGeometry << 3 ) @@ -152,7 +152,7 @@ void ChangesetWriter::writeCreateTableEntry( const ChangesetCreateTableEntry &en { writeByte( static_cast( ChangesetEntryType::OpCreateTable ) ); writeNullTerminatedString( entry.tableName ); - writeVarint( entry.columns.size() ); + writeVarint( static_cast( entry.columns.size() ) ); for ( const TableColumnInfo &column : entry.columns ) { writeColumnInfo( column ); @@ -163,7 +163,7 @@ void ChangesetWriter::writeDropTableEntry( const ChangesetDropTableEntry &entry { writeByte( static_cast( ChangesetEntryType::OpDropTable ) ); writeNullTerminatedString( entry.tableName ); - writeVarint( entry.columns.size() ); + writeVarint( static_cast( entry.columns.size() ) ); for ( const TableColumnInfo &column : entry.columns ) { writeColumnInfo( column ); diff --git a/geodiff/src/drivers/postgresdriver.cpp b/geodiff/src/drivers/postgresdriver.cpp index 4ec25b41..7ef44d29 100644 --- a/geodiff/src/drivers/postgresdriver.cpp +++ b/geodiff/src/drivers/postgresdriver.cpp @@ -1114,10 +1114,10 @@ std::vector> PostgresDriver::executeSql( std::string sq PostgresResult res = execSql( mConn, sql ); std::vector> rows; rows.resize( res.rowCount() ); - for ( size_t r = 0; r < rows.size(); ++r ) + for ( int r = 0; r < static_cast( rows.size() ); ++r ) { rows[r].resize( res.columnCount() ); - for ( size_t i = 0; i < rows[r].size(); ++i ) + for ( int i = 0; i < static_cast( rows[r].size() ); ++i ) rows[r][i] = res.value( r, i ); } return rows; diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 1501f689..8b6648d0 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -1245,7 +1245,7 @@ void SqliteDriver::applySchemaChange( const ChangesetEntry &entry ) { createTable( mDb, { ctEntry->tableName, ctEntry->columns, tableCrs } ); } - catch ( const GeoDiffException &ex ) + catch ( const GeoDiffException & ) { // TODO: Make sure this only catches sqlite errors on CREATE TABLE logApplyConflict( "create_table_failed", entry, true ); diff --git a/geodiff/tests/test_modified_scheme.cpp b/geodiff/tests/test_modified_scheme.cpp index c057cc16..4d373bca 100644 --- a/geodiff/tests/test_modified_scheme.cpp +++ b/geodiff/tests/test_modified_scheme.cpp @@ -118,11 +118,13 @@ static std::unique_ptr createSampleDb( std::string driverName, std::stri makedir( dir ); params["base"] = pathjoin( dir, dbname + ".gpkg" ); } +#ifdef HAVE_POSTGRES else if ( driverName == "postgres" ) { params["base"] = testname + "_" + dbname; params["conninfo"] = pgTestConnInfo(); } +#endif std::unique_ptr driver( Driver::createDriver( static_cast( testContext() ), driverName ) ); driver->create( params, true ); @@ -167,6 +169,7 @@ static std::unique_ptr openBaseModifiedDb( std::string driverName, std:: if ( modifiedName.size() ) params["modified"] = pathjoin( dir, modifiedName + ".gpkg" ); } +#ifdef HAVE_POSTGRES else if ( driverName == "postgres" ) { params["base"] = testname + "_" + baseName; @@ -174,6 +177,7 @@ static std::unique_ptr openBaseModifiedDb( std::string driverName, std:: params["modified"] = testname + "_" + modifiedName; params["conninfo"] = pgTestConnInfo(); } +#endif std::unique_ptr driver( Driver::createDriver( static_cast( testContext() ), driverName ) ); driver->open( params ); From dbfd02459176a4e21a974f1107377c6e3105b66d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 22 May 2026 10:31:25 +0200 Subject: [PATCH 15/21] Fix unsupported change test The previously unsupported change is now supported. --- .../modified_scheme/changed_attribute_type.gpkg | Bin 0 -> 98304 bytes pygeodiff/tests/test_errors.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 geodiff/tests/testdata/modified_scheme/changed_attribute_type.gpkg diff --git a/geodiff/tests/testdata/modified_scheme/changed_attribute_type.gpkg b/geodiff/tests/testdata/modified_scheme/changed_attribute_type.gpkg new file mode 100644 index 0000000000000000000000000000000000000000..7db48d7389cc2c9680e63089e2295594349d1a23 GIT binary patch literal 98304 zcmeI5ZEPDydVqIDOVl@=TpUJq9FLjkNY*P$lq}2g$GMcWqIr=>nOvTtxW;Bf4ylF7 zU1oP>Np5flN_MWjqJLVTKMqCE11@dRqAAd#Zm&P;zeNiaEv~@zM-S*74n7p=p>+_X zABW&N`$aByDVCDlS`OZY_^~tZ%)axy@4T}!OYLTFEQuOMMOmo`8k(YxQ#4Ke0zwo; z&5_qjIkDbb}^k{Q8JZfGaO2%yye1( zNFX*oe;sAgtE508nL=51iJj+ACZ5c)k@#FX!!amq>V;$pX~NrOjKUrr!eMkrl*@vK zRg{+{RZ|2}(k>yZl}7VWUXUbNLvLeL#;S_6bwNV0a1^axXNjhf6uWw^N&X6oMK2>Z zwXpN$fdAwTg7M5=81Kq*g)iefxXf=<3q%b!r2iG7r-nMh+{%K^UoN7C%prlAIvxb5 zLxom2Cz^3Oq_3a}3+9z!6J~b@Fc0XFjpJHAU5ELU{js4~OK+ z$t0KKnKkq}yXIuTIJ#JraTF_(VwDQ-pruE*WFqCRA&N5WA_*s{c{Xd}CgDiKlBPOP zN2Aw?cyhr){!I>HQ>!%wX zzJ%op)|72NFPAqel4_XWo!INOwOeFwV>4#Wts+Cc5^cMRTM6vt8+Q?xOs(m?u-mNoUm$rngdcdY@BmhK%<*14EIfKD$9m=bc`XOC@jQ$Uxud#ZB_0kL&VMv*8`W z9d&KDH|6=m`%<#6CwOb7tuNJ$VtE8V+0rNalfC<|%8WklBlOi??Dhx8$LZS5hGWVl zg;>iBc!mPbgdVR$ILnN{VBjoJ3sEHLE36HLgjaJ4+ z3*6}SOQXw|MvHHTnXn{phNDr&8Oa&sjNnYg5x7Z~BVfD61#f3i^zNXK90!T6_24>t ztS7h}ZfjxN3RXw3ce`hOofkD7-!7`pV0-ns)sBxg#@^j%!#P~zfZ_YDv@Wl!xFpQc zqs7VC9ud_U6B$`436gl13`e$S)T693i*y`WDUhM6;T-_Uod;n>i3hkujozy<_>01yBIKmZ5;0U!VbfB+Bx0?!J8U!?q3sS~}YD>N#cvh+&Qd43?hVyWttPVQ;(1-Y z0@lPlyOK>Df0SU;iTUi!keL}`$U;eO+1TY{vlq{YnCuF<9+*xpG(;{;&0LI4%`lU* z7tc*j&rD8EO<$N|;yLbmI+NtqZie)lLriF9Vrn9E>sE`L3saL9^c?-V151vUP%xG@ z#r%4RnP?$?adsl6i<31U?q;ZfOGKPVo|}lxoS&YWoSmGG&CZ^mYNf+$Y;v--4(ClB z%Cb}vwT%KECznf&FJuLkED0D!Q;{g40?vySp-evY5F3-I6@YeRV4<6h}PHz3z!|x@3l>Y4- z3)g<|*^kc*{L9?&NByK-H2H15`Sbjz*+@7x5C8%|00;m9AOHk_01yBIKmZ5;0U+>X z3H13!`plIBnEyXnvQQfk00KY&2mk>f00e*l5C8%|00;nqqeGxE|2KdC{|OcN;qfea7;0zd!=00AHX1b_e#00KY&2mk>f zaO4R*e{AHGb>BaG832C&f8?zO3IYKj00e*l5C8%|00;m9AOHk_01!AJf#%=;Q^yZj z4HN?cKmZ5;0U!VbfB+Bx0zd!=00AHX1fB^3`t^U9|34F^0wsX}5C8%|00;m9AOHk_ z01yBIKmZ6Fn1KHLKbZd?7#|b_0zd!=00AHX1b_e#00KY&2mk>f@JtYZ>;KP$nLtS( z00e*l5C8%|00;m9AOHk_01yBIhaxaE{4o{i`iL6-@4@d6|Jm@ngY@7x`_=~@4BYmA zweNKIpLcH_yV&&+{cUQwgToAe-1Yv++*t3un5KxO5>`sp+a+ET%b2eSBx%e3*c1!e zI=@jZ2pV?F%xBm*$09D1OeEM03cK@L)e%N>>|#2@qTI@YUO2sotOFWHLg|#Zo-iUM z5F4Mrjxy<0Qb8h_LRof+o##*{p3Jh5_*^=}F(_>6iDU_B!rNtx!XBN%VRT27%cM0b z%FB|fDS{|zmztHx3z8&j=xvP3SXGg>E=VXAj-u7;EYUoYVpq>KDSZXSqL&eyTG&}Q z?l-VL*AZ4xQn8}BvF#75Sw|gfGL>aB+#xZVbv{LmUk>m*g^v^9`74KrSkk081s;gvXZnw^yPp7gR3GFEkRd4f}RzWM7q71u8!g^|+&6>E$u%KZ{Qyr+I(cM@2{lV#J z`o3AE+b+C{J#k!58)$1yUBcO#W{W!8%&3aWiv_Q7Dx$=1dEn})+ul@RtCcKC^~yG* zENK<>iLrE<$*7w&PaotP1~S|0HI7;pG*Kw?3NG^Mwu&N_U^Fq%6TCUyaX%&Gy?s@; z$KXB6?+?z*&|g2@=&$xYnEAY1-l#~bVSaaFuh-UYk-d%0m^HWR*!4=Z?J901u$OP# zMO-qqrgx%xud8^I9F``1O+T34O4aFoPPG|w+N3iu6lv;m8>DpJ=_R>T@Gtx^ojmt@BXVY#}M}s`f4wB z`-9`-bnRxtG3Am%tYyY4Ljh+(kJllc*$%t32|PtCXd4P9N7;}XgTn&57|$)4BTqEm z(-X{%x5ec=uGR0HN&elJyZpg$n7+T+Xj@ae>>Z!CO?H;k(b_bK%wz;!?@G-wj94H+ zUo47PF*8i>WeQl$D`Hg>$)v&xD!T^k#%jExq!(d(B+moJSL-wZQhN!|=cql`0>GsqdinTjKDlPpKTc8v?(&Y!8!nPHxj$rS0&-ywqYC67M^cb1P_Ud!l9UpCsy}QwdbGXI< z!}ncjU0zplNtmNYi*u+bpveQWS*fe#12HnctPF*P+%=>O}!w{&I( zI8x%*gY)xrZPWm#XbQ&UgsV~|)BJjPQDky%*Cy*k6UI;*$xy_otA3K=IA>!dnRG8X zs;hPsx1F`1IdQ+d*Cfe#zIxC(XNx-Avre)e;*@%7^UfwEDjTY?8d6foU_j!du^`fz zdg^{}EQ&N0`ci!Yntah@d$J?82b_XBkA1`@p-$D%Az7}S=*UmaN!Q{0w5g6g{nVP? zJ*{;+ldYx1{@7ZD9De#jM}BH?Ih3C^)lp~rlo#)wB+Iog_)o5q6};KK`n6s`VPPrFLVH`TFxF+hAvG59p{(>K?I(+N9edP*Rld z>`^=&0iA%r5hG9=e#svk8KG}g2|8IL_@P)MJig^bA&gA_8`l1j!-bccJ+9IoLKCS1bwTnRP@y{C5xb7iC_ zc=jy4W=y=T%SI}GX9G)l?Am?4sr$GxBf9%Dq-0e8|9@ZLU0>iI0>2x0_lQLYPzMMA z0U!VbfB+Bx0zd!=00AHX1c1QjfI!rDj9NUESfS`gnmkLbV8|J)z_DEYmI|EWDV@#wQ3*rJmZ6{S7GuWbFzw=R?)Kde6b z{lA*6ecu+2QTqI!4tzq9KiGf(5C8%|00;m9AOHk_01yBIKmZ5;fzJ_ve%d!eo4=>= zne+emslfZ6qXt4#fB+Bx0zd!=00AHX1b_e#00KY&2s~2+-lBU(PK~{D#+zY=*G(Nt$xp|I=RE7L)p@h$o+XalPRh*AaPK>{bx5&+hRB}%N-YB=J zkQ+{o$x?Y+zyCiS_$P||!3G3?01yBIKmZ5;0U!VbfB+Bx0zd!=Jc9&Y?Iw!{eeKr{ z%>Vy?Oa*=#_%!hN8Ehn|3f00e*l5C8%|z)9d&R>+;Te&aQg* znQA`ZcTUtVM zY$BPmOD$&7%X=Kgs3yZAv4F0k^wPpP<59Mk5t~}rL0|NrOpNsI-fr8njcP&A@cI2T z%_?HI&3=BlyKQxK(3z_!#jcvVp?GQmMT}jWE_w8B)@XG)gR&f-RM|>Z+jhxBqX&UP zAV%tXFbyrD2BO`oqXwe)Mms<>wSPoYPY@At4TF)6Pj{0i464a{lBq14;Se!LhFw{T z&$Ifok<)v+;8)^HIW}t$7l_sdkUtoY)3q~gG}9$p>!s(s z*ojuUU|XHY*e^tDfz$rr{5)M7ZKI7MZ;FN1>KIukI?>0F3`LBOmW*_TB{^E_f5ji1 zndz-f@2ycwT`VQOlFV?q_!8pUm@I^H*<>n#jCvwsfk72f+9L15mi{hE+s60p=+#p_ z!5e33%D6L{_1JS^Lz9ht-neyI{bpz?A9E$VP}_XjuRmga*QD#pcI|$liJ~hLMGP9e z{-|}tv2SopUM_D`Bu7GBkw%(vwxFLnFa=x`B~c^yY%@wXGSs|WHEV37&8JgYE)ypM zKPq8f%VU&FC2!F#bJgh pku}r8rsiSu%%rvzY29=)NFSi-Z3@~=x8t%S Date: Fri, 22 May 2026 11:17:08 +0200 Subject: [PATCH 16/21] Report schema conflicts in rebase Assisted-by: Claude Sonnet 4.6 --- geodiff/src/changesetutils.cpp | 101 ++++++++++++++----------- geodiff/src/changesetutils.h | 2 +- geodiff/src/geodiffrebase.cpp | 27 +++++-- geodiff/src/geodiffutils.cpp | 26 +++---- geodiff/src/geodiffutils.hpp | 35 +++++++-- geodiff/tests/test_modified_scheme.cpp | 40 +++++++++- 6 files changed, 155 insertions(+), 76 deletions(-) diff --git a/geodiff/src/changesetutils.cpp b/geodiff/src/changesetutils.cpp index b11b9e8a..d32fe394 100644 --- a/geodiff/src/changesetutils.cpp +++ b/geodiff/src/changesetutils.cpp @@ -11,8 +11,6 @@ #include "changesetreader.h" #include "changesetwriter.h" #include "tableschema.h" -#include -#include ChangesetTable schemaToChangesetTable( const std::string &tableName, const TableSchema &tbl ) @@ -364,62 +362,73 @@ nlohmann::json changesetToJSONSummary( ChangesetReader &reader ) nlohmann::json conflictToJSON( const ConflictFeature &conflict ) { - nlohmann::json res; - res[ "table" ] = std::string( conflict.tableName() ); - res[ "type" ] = "conflict"; - res[ "fid" ] = std::to_string( conflict.pk() ); + if ( const DataConflictFeature *dcf = std::get_if( &conflict ) ) + { + nlohmann::json res; + res[ "table" ] = std::string( dcf->tableName() ); + res[ "type" ] = "conflict"; + res[ "fid" ] = std::to_string( dcf->pk() ); - auto entries = nlohmann::json::array(); + auto entries = nlohmann::json::array(); + for ( const DataConflictItem &item : dcf->items() ) + { + nlohmann::json change; + change[ "column" ] = item.column(); - const std::vector items = conflict.items(); - for ( const ConflictItem &item : items ) - { - nlohmann::json change; - change[ "column" ] = item.column(); + nlohmann::json valueBase = valueToJSON( item.base() ); + nlohmann::json valueOld = valueToJSON( item.theirs() ); + nlohmann::json valueNew = valueToJSON( item.ours() ); - nlohmann::json valueBase = valueToJSON( item.base() ); - nlohmann::json valueOld = valueToJSON( item.theirs() ); - nlohmann::json valueNew = valueToJSON( item.ours() ); + if ( !valueBase.empty() ) + { + if ( valueBase == "null" ) + change[ "base" ] = nullptr; + else + change[ "base" ] = valueBase; + } + if ( !valueOld.empty() ) + { + if ( valueOld == "null" ) + change[ "old" ] = nullptr; + else + change[ "old" ] = valueOld; + } + if ( !valueNew.empty() ) + { + if ( valueNew == "null" ) + change[ "new" ] = nullptr; + else + change[ "new" ] = valueNew; + } - if ( !valueBase.empty() ) - { - if ( valueBase == "null" ) - change[ "base" ] = nullptr; - else - change[ "base" ] = valueBase; - } - if ( !valueOld.empty() ) - { - if ( valueOld == "null" ) - change[ "old" ] = nullptr; - else - change[ "old" ] = valueOld; - } - if ( !valueNew.empty() ) - { - if ( valueNew == "null" ) - change[ "new" ] = nullptr; - else - change[ "new" ] = valueNew; + entries.push_back( change ); } - - entries.push_back( change ); + res[ "changes" ] = entries; + return res; } - res[ "changes" ] = entries; - return res; + else if ( const TableSchemaConflict *tsc = std::get_if( &conflict ) ) + { + nlohmann::json res; + res[ "type" ] = "schema_conflict_table"; + res[ "table" ] = tsc->tableName; + return res; + } + else if ( const ColumnSchemaConflict *csc = std::get_if( &conflict ) ) + { + nlohmann::json res; + res[ "type" ] = "schema_conflict_column"; + res[ "table" ] = csc->tableName; + res[ "column" ] = csc->columnName; + return res; + } + return {}; } nlohmann::json conflictsToJSON( const std::vector &conflicts ) { auto entries = nlohmann::json::array(); for ( const ConflictFeature &item : conflicts ) - { - nlohmann::json msg = conflictToJSON( item ); - if ( msg.empty() ) - continue; - - entries.push_back( msg ); - } + entries.push_back( conflictToJSON( item ) ); nlohmann::json res; res[ "geodiff" ] = entries; diff --git a/geodiff/src/changesetutils.h b/geodiff/src/changesetutils.h index 7fc5268b..dbe84dac 100644 --- a/geodiff/src/changesetutils.h +++ b/geodiff/src/changesetutils.h @@ -13,7 +13,7 @@ #include "json.hpp" -class ConflictFeature; +struct ConflictFeature; class ChangesetReader; class ChangesetWriter; struct ChangesetDataEntry; diff --git a/geodiff/src/geodiffrebase.cpp b/geodiff/src/geodiffrebase.cpp index 60d87245..9bd09c73 100644 --- a/geodiff/src/geodiffrebase.cpp +++ b/geodiff/src/geodiffrebase.cpp @@ -491,7 +491,7 @@ bool _handle_delete( const ChangesetDataEntry &entry, const RebaseMapping &mappi return true; } -void _addConflictItem( ConflictFeature &conflictFeature, int i, +void _addConflictItem( DataConflictFeature &conflictFeature, int i, const Value &base, const Value &theirs, const Value &ours ) { // 4th attribute in gpkg_contents is modified date @@ -500,7 +500,7 @@ void _addConflictItem( ConflictFeature &conflictFeature, int i, return; // ok safe to add it - ConflictItem item( i, base, theirs, ours ); + DataConflictItem item( i, base, theirs, ours ); conflictFeature.addItem( item ); } @@ -530,7 +530,7 @@ bool _handle_update( const ChangesetDataEntry &entry, const RebaseMapping &mappi if ( a != tableInfo.updated.end() ) patchedMap = &a->second; - ConflictFeature conflictFeature( pk, entry.table->name ); + DataConflictFeature conflictFeature( pk, entry.table->name ); bool entryHasChanges = false; for ( const auto &[inIdx, outIdx] : colMap ) @@ -595,6 +595,10 @@ void _prepare_new_changeset( const Context *context, // table schema -> (old column index -> new column index) // Column being absent means its index didn't change. std::map> columnIndexMap; + // We record conflicting tables/columns and skip them when processing further + // changes + std::set conflictingTables; + std::map> conflictingColumns; std::map > tableChanges; @@ -610,7 +614,7 @@ void _prepare_new_changeset( const Context *context, std::string tableName = dataEntry.table->name; // skip table if necessary - if ( context->isTableSkipped( tableName ) ) + if ( context->isTableSkipped( tableName ) || conflictingTables.count( tableName ) ) continue; TableSchema *tableSchema = currentSchema.tableByName( tableName ); @@ -628,9 +632,12 @@ void _prepare_new_changeset( const Context *context, if ( columnIndexMap.find( dataEntry.table.get() ) == columnIndexMap.end() ) { std::map colMap; + auto columnsToSkip = conflictingColumns[tableName]; for ( size_t i = 0; i < tableSchema->columns.size(); i++ ) { const std::string &colName = tableSchema->columns[i].name; + if ( columnsToSkip.count( colName ) ) + continue; for ( size_t j = 0; j < outTableSchema->columns.size(); j++ ) { if ( outTableSchema->columns[j].name == colName ) @@ -696,8 +703,10 @@ void _prepare_new_changeset( const Context *context, if ( existing ) { if ( existing->columns != ctEntry->columns ) - throw GeoDiffException( "Conflict: table " + ctEntry->tableName + - " was created by both changesets with different columns" ); + { + conflicts.push_back( TableSchemaConflict { ctEntry->tableName } ); + conflictingTables.insert( ctEntry->tableName ); + } isDuplicate = true; } } @@ -717,8 +726,10 @@ void _prepare_new_changeset( const Context *context, if ( it != table->columns.end() ) { if ( *it != acEntry->column ) - throw GeoDiffException( "During rebase, column " + acEntry->tableName + "." + acEntry->column.name + - " was added by both changesets with different definitions" ); + { + conflicts.push_back( ColumnSchemaConflict { acEntry->tableName, acEntry->column.name } ); + conflictingColumns[acEntry->tableName].insert( acEntry->column.name ); + } isDuplicate = true; } } diff --git a/geodiff/src/geodiffutils.cpp b/geodiff/src/geodiffutils.cpp index ae23ca7a..cd301936 100644 --- a/geodiff/src/geodiffutils.cpp +++ b/geodiff/src/geodiffutils.cpp @@ -556,40 +556,40 @@ void TmpFile::setPath( const std::string &path ) mPath = path; } -ConflictFeature::ConflictFeature( int pk, - const std::string &tableName ) +DataConflictFeature::DataConflictFeature( int pk, + const std::string &tableName ) : mPk( pk ) , mTableName( tableName ) { } -bool ConflictFeature::isValid() const +bool DataConflictFeature::isValid() const { return !mItems.empty(); } -void ConflictFeature::addItem( const ConflictItem &item ) +void DataConflictFeature::addItem( const DataConflictItem &item ) { mItems.push_back( item ); } -const std::string &ConflictFeature::tableName() const +const std::string &DataConflictFeature::tableName() const { return mTableName; } -int ConflictFeature::pk() const +int DataConflictFeature::pk() const { return mPk; } -const std::vector &ConflictFeature::items() const +const std::vector &DataConflictFeature::items() const { return mItems; } -ConflictItem::ConflictItem( int column, const Value &base, - const Value &theirs, const Value &ours ) +DataConflictItem::DataConflictItem( int column, const Value &base, + const Value &theirs, const Value &ours ) : mColumn( column ) , mBase( base ) , mTheirs( theirs ) @@ -598,22 +598,22 @@ ConflictItem::ConflictItem( int column, const Value &base, } -Value ConflictItem::base() const +Value DataConflictItem::base() const { return mBase; } -Value ConflictItem::theirs() const +Value DataConflictItem::theirs() const { return mTheirs; } -Value ConflictItem::ours() const +Value DataConflictItem::ours() const { return mOurs; } -int ConflictItem::column() const +int DataConflictItem::column() const { return mColumn; } diff --git a/geodiff/src/geodiffutils.hpp b/geodiff/src/geodiffutils.hpp index 20e3514a..bd311e59 100644 --- a/geodiff/src/geodiffutils.hpp +++ b/geodiff/src/geodiffutils.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -201,10 +202,10 @@ class TmpFile }; -class ConflictItem +class DataConflictItem { public: - ConflictItem( + DataConflictItem( int column, const Value &base, const Value &theirs, @@ -222,19 +223,39 @@ class ConflictItem Value mOurs; }; -class ConflictFeature +class DataConflictFeature { public: - ConflictFeature( int pk, const std::string &tableName ); + DataConflictFeature( int pk, const std::string &tableName ); bool isValid() const; - void addItem( const ConflictItem &item ); + void addItem( const DataConflictItem &item ); const std::string &tableName() const; int pk() const; - const std::vector &items() const; + const std::vector &items() const; private: int mPk; std::string mTableName; - std::vector mItems; + std::vector mItems; +}; + +//! Schema conflict: two changesets created or modified the same table with +//different definitions +struct TableSchemaConflict +{ + std::string tableName; +}; + +//! Schema conflict: two changesets added the same column with different +//definitions +struct ColumnSchemaConflict +{ + std::string tableName; + std::string columnName; +}; + +struct ConflictFeature : public std::variant +{ + using variant::variant; }; diff --git a/geodiff/tests/test_modified_scheme.cpp b/geodiff/tests/test_modified_scheme.cpp index 4d373bca..b0540137 100644 --- a/geodiff/tests/test_modified_scheme.cpp +++ b/geodiff/tests/test_modified_scheme.cpp @@ -360,7 +360,7 @@ static void testSchemaDiffRebaseWith( std::string driverName, std::string testna { std::vector conflicts; rebase( static_cast( testContext() ), baseSchema, base2TheirsPath, theirs2bothPath, base2OursPath, conflicts ); - ASSERT_EQ( conflicts.size(), 0 ) << conflicts.size() << " conflicts in rebase (more than " << expectedConflicts << "): " << conflictsToJSON( conflicts ).dump( 2 ); + ASSERT_EQ( static_cast( conflicts.size() ), expectedConflicts ) << conflicts.size() << " conflicts in rebase (expected " << expectedConflicts << "): " << conflictsToJSON( conflicts ).dump( 2 ); } if ( expectedConflicts > 0 ) @@ -496,6 +496,44 @@ TEST( ModifiedSchemeTest, rebase_redundant_create_table ) } ); } +TEST( ModifiedSchemeTest, rebase_conflict_create_table ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffRebaseWith( driverName, "rebase_conflict_create_table", 1, + [ = ]( Driver & db ) + { + db.executeSql( "CREATE TABLE vehicles (fid INTEGER, name TEXT, type TEXT)" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "CREATE TABLE vehicles (fid INTEGER, name TEXT, manufacturer TEXT)" ); + }, + [ = ]( Driver & db ) { } ); +} + +TEST( ModifiedSchemeTest, rebase_conflict_add_column ) +{ + // TODO: Postgres support + std::string driverName = "sqlite"; + + testSchemaDiffRebaseWith( driverName, "rebase_conflict_add_column", 1, + [ = ]( Driver & db ) + { + db.executeSql( "ALTER TABLE tram_stops ADD COLUMN bench_count INTEGER" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "ALTER TABLE tram_stops ADD COLUMN bench_count TEXT" ); + db.executeSql( "INSERT INTO tram_stops (fid, name, bench_count) VALUES (4, 'Palmovka', 'three')" ); + }, + [ = ]( Driver & db ) + { + db.executeSql( "INSERT INTO tram_stops (fid, name) VALUES (4, 'Palmovka')" ); + } ); +} + int main( int argc, char **argv ) { testing::InitGoogleTest( &argc, argv ); From 1131142c6537d410ffbe122c350ff9aa468abd26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 22 May 2026 11:31:29 +0200 Subject: [PATCH 17/21] Test schema change JSON format Assisted-by: Claude Sonnet 4.6 --- geodiff/tests/test_changeset_utils.cpp | 13 ++++ .../changesets/added_attribute.json | 65 +++++++++++++++++++ .../changesets/added_table.json | 64 ++++++++++++++++++ .../changesets/delete_attribute.json | 65 +++++++++++++++++++ .../changesets/delete_table.json | 64 ++++++++++++++++++ 5 files changed, 271 insertions(+) create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/added_attribute.json create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/added_table.json create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/delete_attribute.json create mode 100644 geodiff/tests/testdata/modified_scheme/changesets/delete_table.json diff --git a/geodiff/tests/test_changeset_utils.cpp b/geodiff/tests/test_changeset_utils.cpp index 4eaaed50..10cb21df 100644 --- a/geodiff/tests/test_changeset_utils.cpp +++ b/geodiff/tests/test_changeset_utils.cpp @@ -14,6 +14,7 @@ #include "changesetwriter.h" #include "geodiffutils.hpp" +#include "tableschema.h" #include "json.hpp" @@ -139,6 +140,18 @@ TEST( ChangesetUtils, test_export_json ) doExportAndCompare( pathjoin( testdir(), "2_deletes", "base-deleted_A" ), pathjoin( tmpdir(), "test_export_json", "delete-diff.json" ) ); + + doExportAndCompare( pathjoin( testdir(), "modified_scheme", "changesets", "added_attribute" ), + pathjoin( tmpdir(), "test_export_json", "added_attribute.json" ) ); + + doExportAndCompare( pathjoin( testdir(), "modified_scheme", "changesets", "added_table" ), + pathjoin( tmpdir(), "test_export_json", "added_table.json" ) ); + + doExportAndCompare( pathjoin( testdir(), "modified_scheme", "changesets", "delete_attribute" ), + pathjoin( tmpdir(), "test_export_json", "delete_attribute.json" ) ); + + doExportAndCompare( pathjoin( testdir(), "modified_scheme", "changesets", "delete_table" ), + pathjoin( tmpdir(), "test_export_json", "delete_table.json" ) ); } TEST( ChangesetUtils, test_export_json_summary ) diff --git a/geodiff/tests/testdata/modified_scheme/changesets/added_attribute.json b/geodiff/tests/testdata/modified_scheme/changesets/added_attribute.json new file mode 100644 index 00000000..ce7ca428 --- /dev/null +++ b/geodiff/tests/testdata/modified_scheme/changesets/added_attribute.json @@ -0,0 +1,65 @@ +{ + "geodiff": [ + { + "column": { + "geomHasM": false, + "geomHasZ": false, + "geomSrsId": -1, + "geomType": "", + "isAutoIncrement": false, + "isGeometry": false, + "isNotNull": false, + "isPrimaryKey": false, + "name": "added_field", + "type": "integer" + }, + "tableName": "simple", + "type": "add_column" + }, + { + "changes": [ + { + "column": 0, + "old": 1 + }, + { + "column": 4, + "new": 1, + "old": null + } + ], + "table": "simple", + "type": "update" + }, + { + "changes": [ + { + "column": 0, + "old": 2 + }, + { + "column": 4, + "new": 2, + "old": null + } + ], + "table": "simple", + "type": "update" + }, + { + "changes": [ + { + "column": 0, + "old": 3 + }, + { + "column": 4, + "new": 3, + "old": null + } + ], + "table": "simple", + "type": "update" + } + ] +} diff --git a/geodiff/tests/testdata/modified_scheme/changesets/added_table.json b/geodiff/tests/testdata/modified_scheme/changesets/added_table.json new file mode 100644 index 00000000..763e5852 --- /dev/null +++ b/geodiff/tests/testdata/modified_scheme/changesets/added_table.json @@ -0,0 +1,64 @@ +{ + "geodiff": [ + { + "columns": [ + { + "geomHasM": false, + "geomHasZ": false, + "geomSrsId": -1, + "geomType": "", + "isAutoIncrement": true, + "isGeometry": false, + "isNotNull": true, + "isPrimaryKey": true, + "name": "fid", + "type": "integer" + }, + { + "geomHasM": false, + "geomHasZ": false, + "geomSrsId": 4326, + "geomType": "POLYGON", + "isAutoIncrement": false, + "isGeometry": true, + "isNotNull": false, + "isPrimaryKey": false, + "name": "geometry", + "type": "geometry" + }, + { + "geomHasM": false, + "geomHasZ": false, + "geomSrsId": -1, + "geomType": "", + "isAutoIncrement": false, + "isGeometry": false, + "isNotNull": false, + "isPrimaryKey": false, + "name": "myfield", + "type": "text" + } + ], + "tableName": "added_table", + "type": "create_table" + }, + { + "changes": [ + { + "column": 0, + "new": 1 + }, + { + "column": 1, + "new": "R1AAA+YQAADwIZt/aSztvyhTjE5mN9u/MjKhWTE40z8KvmmFKQnlPwEDAAAAAQAAAAUAAACMGhYhZuPqv2eg8/JZ2OE/8CGbf2ks7b/WaZRh4P7TPyhTjE5mN9u/MjKhWTE40z+QaI2BBKXjvwq+aYUpCeU/jBoWIWbj6r9noPPyWdjhPw==" + }, + { + "column": 2, + "new": "hello" + } + ], + "table": "added_table", + "type": "insert" + } + ] +} diff --git a/geodiff/tests/testdata/modified_scheme/changesets/delete_attribute.json b/geodiff/tests/testdata/modified_scheme/changesets/delete_attribute.json new file mode 100644 index 00000000..4cdcf771 --- /dev/null +++ b/geodiff/tests/testdata/modified_scheme/changesets/delete_attribute.json @@ -0,0 +1,65 @@ +{ + "geodiff": [ + { + "changes": [ + { + "column": 0, + "old": 1 + }, + { + "column": 4, + "new": null, + "old": 1 + } + ], + "table": "simple", + "type": "update" + }, + { + "changes": [ + { + "column": 0, + "old": 2 + }, + { + "column": 4, + "new": null, + "old": 2 + } + ], + "table": "simple", + "type": "update" + }, + { + "changes": [ + { + "column": 0, + "old": 3 + }, + { + "column": 4, + "new": null, + "old": 3 + } + ], + "table": "simple", + "type": "update" + }, + { + "column": { + "geomHasM": false, + "geomHasZ": false, + "geomSrsId": -1, + "geomType": "", + "isAutoIncrement": false, + "isGeometry": false, + "isNotNull": false, + "isPrimaryKey": false, + "name": "added_field", + "type": "integer" + }, + "tableName": "simple", + "type": "drop_column" + } + ] +} diff --git a/geodiff/tests/testdata/modified_scheme/changesets/delete_table.json b/geodiff/tests/testdata/modified_scheme/changesets/delete_table.json new file mode 100644 index 00000000..6730d725 --- /dev/null +++ b/geodiff/tests/testdata/modified_scheme/changesets/delete_table.json @@ -0,0 +1,64 @@ +{ + "geodiff": [ + { + "changes": [ + { + "column": 0, + "old": 1 + }, + { + "column": 1, + "old": "R1AAA+YQAADwIZt/aSztvyhTjE5mN9u/MjKhWTE40z8KvmmFKQnlPwEDAAAAAQAAAAUAAACMGhYhZuPqv2eg8/JZ2OE/8CGbf2ks7b/WaZRh4P7TPyhTjE5mN9u/MjKhWTE40z+QaI2BBKXjvwq+aYUpCeU/jBoWIWbj6r9noPPyWdjhPw==" + }, + { + "column": 2, + "old": "hello" + } + ], + "table": "added_table", + "type": "delete" + }, + { + "columns": [ + { + "geomHasM": false, + "geomHasZ": false, + "geomSrsId": -1, + "geomType": "", + "isAutoIncrement": true, + "isGeometry": false, + "isNotNull": true, + "isPrimaryKey": true, + "name": "fid", + "type": "integer" + }, + { + "geomHasM": false, + "geomHasZ": false, + "geomSrsId": 4326, + "geomType": "POLYGON", + "isAutoIncrement": false, + "isGeometry": true, + "isNotNull": false, + "isPrimaryKey": false, + "name": "geometry", + "type": "geometry" + }, + { + "geomHasM": false, + "geomHasZ": false, + "geomSrsId": -1, + "geomType": "", + "isAutoIncrement": false, + "isGeometry": false, + "isNotNull": false, + "isPrimaryKey": false, + "name": "myfield", + "type": "text" + } + ], + "tableName": "added_table", + "type": "drop_table" + } + ] +} From 588be95df45a4a4b4bbb4a2f93f0c4fe6ebda051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 22 May 2026 12:42:33 +0200 Subject: [PATCH 18/21] Add tests for executeSql() --- geodiff/src/drivers/sqlitedriver.cpp | 5 ++++- geodiff/tests/test_driver_postgres.cpp | 18 ++++++++++++++++++ geodiff/tests/test_driver_sqlite.cpp | 11 +++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/geodiff/src/drivers/sqlitedriver.cpp b/geodiff/src/drivers/sqlitedriver.cpp index 8b6648d0..47d27e7c 100644 --- a/geodiff/src/drivers/sqlitedriver.cpp +++ b/geodiff/src/drivers/sqlitedriver.cpp @@ -1546,7 +1546,10 @@ std::vector> SqliteDriver::executeSql( std::string sql for ( size_t i = 0; i < values.size(); ++i ) { const unsigned char *text = sqlite3_column_text( stmt.get(), static_cast( i ) ); - values.push_back( reinterpret_cast( text ) ); + if ( text ) + values[i] = reinterpret_cast( text ); + else + values[i] = ""; } rows.push_back( values ); } diff --git a/geodiff/tests/test_driver_postgres.cpp b/geodiff/tests/test_driver_postgres.cpp index a5aa1f9e..9a464c22 100644 --- a/geodiff/tests/test_driver_postgres.cpp +++ b/geodiff/tests/test_driver_postgres.cpp @@ -1100,6 +1100,24 @@ TEST( PostgresDriverTest, test_timestamp_miliseconds ) PQfinish( c ); } +TEST( PostgresDriverTest, execute_sql ) +{ + std::string conninfo = pgTestConnInfo(); + execSqlCommands( conninfo, pathjoin( testdir(), "postgres", "base.sql" ) ); + + DriverParametersMap params; + params["conninfo"] = conninfo; + params["base"] = "gd_base"; + + std::unique_ptr driver( Driver::createDriver( static_cast( testContext() ), "postgres" ) ); + ASSERT_TRUE( driver ); + driver->open( params ); + + std::vector> result = driver->executeSql( "SELECT fid, name FROM simple LIMIT 2" ); + std::vector> expected = {{"1", "feature1"}, {"2", "feature2"}}; + ASSERT_EQ( result, expected ); +} + int main( int argc, char **argv ) { diff --git a/geodiff/tests/test_driver_sqlite.cpp b/geodiff/tests/test_driver_sqlite.cpp index 330cacb8..1cb096d6 100644 --- a/geodiff/tests/test_driver_sqlite.cpp +++ b/geodiff/tests/test_driver_sqlite.cpp @@ -501,6 +501,17 @@ TEST( SqliteDriverTest, make_copy_sqlite_concurrent ) ASSERT_EQ( sqlite3_column_int( stmtSafe.get(), 0 ), 2 ); } +TEST( SqliteDriverTest, execute_sql ) +{ + std::string fileBase = pathjoin( testdir(), "base.gpkg" ); + std::unique_ptr driver( Driver::createDriver( static_cast( testContext() ), "sqlite" ) ); + driver->open( Driver::sqliteParametersSingleSource( fileBase ) ); + + std::vector> result = driver->executeSql( "SELECT fid, name FROM simple LIMIT 2" ); + std::vector> expected = {{"1", "feature1"}, {"2", "feature2"}}; + ASSERT_EQ( result, expected ); +} + int main( int argc, char **argv ) { From d4504be98a8ac2b4c0da65e97125a5f479b54009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Fri, 22 May 2026 13:11:10 +0200 Subject: [PATCH 19/21] Sort arrays before STL set operations Assisted-by: Claude Sonnet 4.6 --- geodiff/src/tableschemadiff.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/geodiff/src/tableschemadiff.cpp b/geodiff/src/tableschemadiff.cpp index 00bd1594..1572d31d 100644 --- a/geodiff/src/tableschemadiff.cpp +++ b/geodiff/src/tableschemadiff.cpp @@ -101,8 +101,10 @@ std::vector diffTableSchema( const TableSchema &base, const Tabl const std::unordered_map baseColumns = byName( base.columns ); const std::unordered_map modifiedColumns = byName( modified.columns ); - const std::vector baseColNames = names( base.columns ); - const std::vector modifiedColNames = names( modified.columns ); + std::vector baseColNames = names( base.columns ); + std::vector modifiedColNames = names( modified.columns ); + std::sort( baseColNames.begin(), baseColNames.end() ); + std::sort( modifiedColNames.begin(), modifiedColNames.end() ); std::vector deletedColNames; std::set_difference( baseColNames.begin(), baseColNames.end(), @@ -144,8 +146,10 @@ std::vector diffDatabaseSchema( const DatabaseSchema &base, cons const std::unordered_map baseTables = byName( base.tables ); const std::unordered_map modifiedTables = byName( modified.tables ); - const std::vector baseTableNames = names( base.tables ); - const std::vector modifiedTableNames = names( modified.tables ); + std::vector baseTableNames = names( base.tables ); + std::vector modifiedTableNames = names( modified.tables ); + std::sort( baseTableNames.begin(), baseTableNames.end() ); + std::sort( modifiedTableNames.begin(), modifiedTableNames.end() ); std::vector deletedTableNames; std::set_difference( baseTableNames.begin(), baseTableNames.end(), From c14e0f642a5504edde89fd392fec61486e66d588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Tue, 26 May 2026 10:20:10 +0200 Subject: [PATCH 20/21] Document changes to changeset format --- docs/changeset-format.md | 86 +++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 11 deletions(-) diff --git a/docs/changeset-format.md b/docs/changeset-format.md index 4ab8dc50..b05a1a1c 100644 --- a/docs/changeset-format.md +++ b/docs/changeset-format.md @@ -1,25 +1,38 @@ # Changeset Format -The format for changesets is borrowed from SQLite3 session extension's internal format -and it is currently 100% compatible with it. Below are details of the format, extracted -from SQLite3 source code. +The format for changesets is based on the SQLite3 session extension's internal +format. Below are details of the format: ## Summary -A changeset is a collection of DELETE, UPDATE and INSERT operations on -one or more tables. Operations on a single table are grouped together, -but may occur in any order (i.e. deletes, updates and inserts are all -mixed together). +A changeset is a linear list of operations of various types, identified by a +one-byte tag: -Each group of changes begins with a table header: +- Table record (`'T'`) +- Data entry (`18`, `23`, `9`) +- Create table entry (`'a'`) +- Drop table entry (`'A'`) +- Add column entry (`'c'`) +- Drop column entry (`'C'`) + +Data operations on a single table are grouped together, preceded by a single +table record. The operations are processed as if they were executed +sequentially. + +## Table record + +The table record identifies the table and its columns: - 1 byte: Constant 0x54 (capital 'T') - Varint: Number of columns in the table. - nCol bytes: 0x01 for PK columns, 0x00 otherwise. -- N bytes: Unqualified table name (encoded using UTF-8). Nul-terminated. +- N bytes: Unqualified table name (encoded using UTF-8). Null-terminated. -Followed by one or more changes to the table. +## Data entry + +A data entry is a DELETE, UPDATE or INSERT operation on one table (identified +by last table record): - 1 byte: Either SQLITE_INSERT (0x12), UPDATE (0x17) or DELETE (0x09). - 1 byte: The "indirect-change" flag. @@ -48,6 +61,44 @@ with table columns modified by the UPDATE change contain the new values. Fields associated with table columns that are not modified are set to "undefined". +## Create table entry + +This entry creates a new empty table: + +- 1 byte: Constant 0x61 (lowercase 'a') +- Null-terminated string: Table name +- Varint: Number of columns in the table. +- nCol entries: Table column info. + +## Drop table entry + +This entry deletes an existing table by name. The table must be empty. Column +information is kept for the purpose of rebasing and inverting the changeset. + +- 1 byte: Constant 0x41 (uppercase 'A') +- Null-terminated string: Table name +- Varint: Number of columns in the table. +- nCol entries: Table column info. + +## Add column entry + +This entry adds a new column to an existing table. All existing rows will have +`NULL` filled in. + +- 1 byte: Constant 0x63 (lowercase 'c') +- Null-terminated string: Table name +- Table column info. + +## Drop column entry + +This entry deletes an existing column from a table. All existing rows must have +`NULL` values in this column. Column information is kept for the purpose of +rebasing and inverting the changeset. + +- 1 byte: Constant 0x43 (uppercase 'C') +- Null-terminated string: Table name +- Table column info. + # Record Format Unlike the SQLite database record format, each field is self-contained - @@ -69,7 +120,7 @@ is followed by: - Text values: A varint containing the number of bytes in the value (encoded using UTF-8). Followed by a buffer containing the UTF-8 representation - of the text value. There is no nul terminator. + of the text value. There is no null terminator. - Blob values: A varint containing the number of bytes in the value, followed by @@ -82,6 +133,19 @@ is followed by: An 8-byte big-endian IEEE 754-2008 real value. +# Table column info + +- Null-terminated string: column name +- 1 byte: Column type (same as record) +- 1 byte: Flags packed as bits. From LSb: + - is primary key + - is autoincrement + - is geometry column + - geometry has Z coordinate + - geometry has M coordinate +- Null-terminated string: geometry type (`POINT`, `LINE`, ...) +- Varint: SRS ID for geometry + # Varint Format Varint values are encoded in the same way as varints in the SQLite From c443e2ce545a79f7c268db3f9a7b12240273e657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ko=C5=88a=C5=99=C3=ADk?= Date: Tue, 26 May 2026 10:28:56 +0200 Subject: [PATCH 21/21] Document schema change principles and limitations --- docs/schema-changes.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 docs/schema-changes.md diff --git a/docs/schema-changes.md b/docs/schema-changes.md new file mode 100644 index 00000000..803bff73 --- /dev/null +++ b/docs/schema-changes.md @@ -0,0 +1,26 @@ +# Schema changes + +Geodiff supports diffing databases with different schemata. It identifies table +and column additions/deletions. + +Tables and columns are always created empty and any data present in the +database is recreated manually via `INSERT`/`UPDATE` entries, written after the +schema change entry. Likewise, deletion entries expect the table/column to be +empty, so `DELETE`/`UPDATE` entries clearing the data are written beforehand. +This simplifies inverting and rebasing, since the schema change entries work +separately from e.g. the ID renaming machinery. + +## Limitations and pitfalls + +Since we only look at the final state of the database, default values in +columns are not supported. Any default specified during creation of the column +will be simulated by an `UPDATE` for each row. This means that only the rows +present in the modified database will get the "default" value, and the default +won't be propagated when the diff is applied onto base. + +Renaming columns is supported only as a deletion & addition. This has similar +pitfalls to the default values - on rebase, values in the second database won't +be moved. Same with renaming tables. + +The intermediate states created by applying the resulting diff (e.g. "nulling +out" column before dropping it) may conflict with database constraints.