1414//
1515// This tool fixes all three problems in one pass per DF_ directory:
1616//
17- // Stage 0 — Sort & deduplicate the BC table. Build BC permutation map:
18- // bcPerm[oldBCrow] = newBCrow.
17+ // Stage 0 — Deduplicate the BC table in place (order-preserving; the input
18+ // must already be globalBC-sorted). Build BC permutation map:
19+ // bcPerm[oldBCrow] = newBCrow (monotonic non-decreasing).
1920//
2021// Stage 1 — Process every table that carries fIndexBCs / fIndexBC.
2122// Remap the index via bcPerm, sort rows by the new index, and
@@ -501,18 +502,31 @@ static PermMap rewriteTable(TTree *src, TDirectory *dirOut,
501502}
502503
503504// ============================================================================
504- // SECTION 4 — Stage 0: BC table sort + deduplication
505+ // SECTION 4 — Stage 0: BC table deduplication (order-preserving)
505506// ============================================================================
506507//
507- // Reads fGlobalBC from the BC tree, sorts rows, drops exact-duplicate BC
508- // values, and writes the compacted table. Returns bcPerm[oldRow] = newRow.
508+ // Reads fGlobalBC from the BC tree, drops exact-duplicate BC values IN PLACE
509+ // (preserving input row order), and writes the compacted table. Returns
510+ // bcPerm[oldRow] = newRow.
511+ //
512+ // The dedup is deliberately order-preserving so that bcPerm is monotonic
513+ // non-decreasing. This matters because every BC-indexed table (collisions,
514+ // FT0/FV0/FDD/Zdc, ...) is sliced per BC and must stay sorted by its fIndexBCs,
515+ // and collisions are in turn the grouping anchor for tracks (sorted by
516+ // fIndexCollisions). A non-order-preserving BC remap would force a full reorder
517+ // cascade BC -> collisions -> tracks to keep all those groupings valid; keeping
518+ // bcPerm monotonic means none of those tables need to be reordered at all.
519+ //
520+ // This REQUIRES the input BC table to already be sorted by fGlobalBC (the
521+ // standard AO2D invariant; also asserted on the output by validateDF check #1).
522+ // We assert it loudly rather than silently emit a non-monotonic BC table.
509523
510524struct BCStage0Result {
511- PermMap bcPerm ; // bcPerm[oldRow] = newRow in sorted/ deduped BC table
525+ PermMap bcPerm ; // bcPerm[oldRow] = newRow in the deduped BC table
512526 Long64_t nUnique = 0 ;
513527};
514528
515- static BCStage0Result stage0_sortBCs (TTree * treeBCs , TDirectory * dirOut ) {
529+ static BCStage0Result stage0_dedupBCs (TTree * treeBCs , TDirectory * dirOut ) {
516530 BCStage0Result res ;
517531 Long64_t n = treeBCs -> GetEntries ();
518532 if (n == 0 ) return res ;
@@ -525,19 +539,29 @@ static BCStage0Result stage0_sortBCs(TTree *treeBCs, TDirectory *dirOut) {
525539 std ::vector < ULong64_t > gbcs (n );
526540 for (Long64_t i = 0 ; i < n ; ++ i ) { treeBCs -> GetEntry (i ); gbcs [i ] = gbc ; }
527541
528- // Sort row indices by fGlobalBC
529- std ::vector < Long64_t > order (n );
530- std ::iota (order .begin (), order .end (), 0 );
531- std ::stable_sort (order .begin (), order .end (),
532- [& ](Long64_t a , Long64_t b ){ return gbcs [a ] < gbcs [b ]; });
542+ // The BC table must already be sorted by fGlobalBC: the dedup below is
543+ // order-preserving (merges only adjacent equal-globalBC rows), which keeps
544+ // bcPerm monotonic and avoids a reorder cascade through collisions/tracks.
545+ // A non-monotonic input would silently break that guarantee, so abort loudly.
546+ for (Long64_t i = 1 ; i < n ; ++ i ) {
547+ if (gbcs [i ] < gbcs [i - 1 ]) {
548+ std ::cerr << "FATAL: O2bc_* table is not sorted by fGlobalBC (row " << i
549+ << " globalBC=" << gbcs [i ] << " < row " << (i - 1 )
550+ << " globalBC=" << gbcs [i - 1 ] << ").\n"
551+ << " AODBcRewriter requires a globalBC-sorted BC table so that\n"
552+ << " BC deduplication is order-preserving; aborting.\n" ;
553+ std ::abort ();
554+ }
555+ }
533556
534- // Build deduplicated row list and the permutation
557+ // Build the deduplicated row list and the (monotonic) permutation in source
558+ // row order: adjacent rows sharing a globalBC collapse onto one output row.
535559 res .bcPerm .assign (n , -1 );
536560 std ::vector < Long64_t > rowOrder ; // source rows to keep, in output order
537- ULong64_t prev = ULong64_t ( -1 ) ;
561+ ULong64_t prev = 0 ;
538562 Int_t newRow = -1 ;
539- for (Long64_t srcRow : order ) {
540- if (gbcs [srcRow ] != prev ) {
563+ for (Long64_t srcRow = 0 ; srcRow < n ; ++ srcRow ) {
564+ if (newRow < 0 || gbcs [srcRow ] != prev ) {
541565 ++ newRow ;
542566 prev = gbcs [srcRow ];
543567 rowOrder .push_back (srcRow );
@@ -547,7 +571,7 @@ static BCStage0Result stage0_sortBCs(TTree *treeBCs, TDirectory *dirOut) {
547571 }
548572 res .nUnique = rowOrder .size ();
549573
550- std ::cout << " BC stage: " << n << " rows -> " << res .nUnique << " unique\n" ;
574+ std ::cout << " BC stage: " << n << " rows -> " << res .nUnique << " unique (in-place dedup) \n" ;
551575
552576 // Write the BC table (no index remapping needed for the table itself)
553577 rewriteTable (treeBCs , dirOut , rowOrder , /*indexBranch=*/ "" , /*parentPerm=*/ {});
@@ -858,6 +882,98 @@ static const PermMap *findPermByPrefix(
858882 return nullptr ;
859883}
860884
885+ // ============================================================================
886+ // SECTION 9b — Stage 1b: Collision-grouped track tables
887+ // ============================================================================
888+ //
889+ // The primary track tables (O2track_iu, O2mfttrack_*, O2fwdtrack) are GROUPED
890+ // by collision. O2's slicing cache (ArrowTableSlicingCache::validateOrder)
891+ // requires every fIndexCollisions group — including the "-1" ambiguous group —
892+ // to be a single contiguous run; otherwise it aborts with
893+ // "Table ... index fIndexCollisions has a group with index -1 that is split".
894+ //
895+ // When several MC sub-timeframes are merged into one DF_ folder (data-embedding
896+ // anchoring, which stores MC timeframes under the same DF_ as the parent data
897+ // file), each sub-frame contributes its own [collision-grouped][-1 ambiguous]
898+ // block. Concatenating them splits the -1 group into N runs, so the table is
899+ // no longer sliceable. Stage 1 only reorders BC-indexed tables, and tracks are
900+ // otherwise written in input row order, so the split survives into the output.
901+ //
902+ // This stage re-establishes the grouping: it reorders each collision-grouped
903+ // track table by its remapped fIndexCollisions (stable, with -1 sinking to the
904+ // end so the ambiguous group is one contiguous run — matching the Stage 1
905+ // convention) and publishes the resulting row permutation. Downstream:
906+ // * paste-join children (O2trackextra, O2trackcov_iu, O2mctracklabel, ...)
907+ // follow the published parent permutation;
908+ // * every fIndexTracks* / fIndexMFTTracks / fIndexFwdTracks reference is
909+ // remapped through it in processPasteJoinTables.
910+ static bool isCollGroupedTrackTable (const std ::string & tname ) {
911+ static const char * kPrefixes [] = {"O2track_iu ", "O2track ",
912+ "O2mfttrack ", "O2fwdtrack "};
913+ for (auto * p : kPrefixes )
914+ if (TString (tname .c_str ()).BeginsWith (p )) return true;
915+ return false ;
916+ }
917+
918+ static void stage1b_reorderTrackTables (
919+ TDirectory * dirIn , TDirectory * dirOut ,
920+ std ::unordered_map < std ::string , PermMap > & allPerms ,
921+ std ::unordered_set < std ::string > & written ) {
922+
923+ const PermMap * collPermP = findPermByPrefix (allPerms , "O2collision_" );
924+ if (!collPermP ) return ; // no collisions present — nothing to regroup against
925+
926+ TIter it (dirIn -> GetListOfKeys ());
927+ while (TKey * key = static_cast < TKey * > (it ())) {
928+ if (TString (key -> GetClassName ()) != "TTree" ) continue ;
929+ std ::unique_ptr < TObject > obj (key -> ReadObj ());
930+ TTree * src = dynamic_cast < TTree * > (obj .get ());
931+ if (!src ) continue ;
932+
933+ std ::string tname = src -> GetName ();
934+ if (written .count (tname )) continue ; // BC-indexed tracks etc. already done
935+ if (!isCollGroupedTrackTable (tname )) continue ;
936+ if (isPasteJoinChild (tname )) continue ; // children follow their parent below
937+ if (!src -> GetBranch ("fIndexCollisions" )) continue ;
938+
939+ std ::cout << " Stage1b [coll-grouped]: " << tname << "\n" ;
940+
941+ Long64_t nSrc = src -> GetEntries ();
942+ TBranch * inIdxBr = src -> GetBranch ("fIndexCollisions" );
943+ TLeaf * idxLeaf = static_cast < TLeaf * > (inIdxBr -> GetListOfLeaves ()-> At (0 ));
944+ ScalarTag idxTag = tagOf (idxLeaf );
945+ std ::vector < unsigned char > idxBuf (byteSize (idxTag ), 0 );
946+ inIdxBr -> SetAddress (idxBuf .data ());
947+
948+ struct SortEntry { Long64_t newColl ; Long64_t srcRow ; };
949+ std ::vector < SortEntry > entries ;
950+ entries .reserve (nSrc );
951+ for (Long64_t i = 0 ; i < nSrc ; ++ i ) {
952+ inIdxBr -> GetEntry (i );
953+ Long64_t oldColl = readAsInt (idxBuf .data (), idxTag );
954+ Long64_t newColl = (oldColl >= 0 && oldColl < (Long64_t )collPermP -> size ())
955+ ? (* collPermP )[oldColl ] : -1 ;
956+ entries .push_back ({newColl , i });
957+ }
958+ // Stable-sort by remapped collision; the ambiguous group (-1) sinks to the
959+ // end as a single contiguous run. Stable keeps the within-collision order.
960+ std ::stable_sort (entries .begin (), entries .end (),
961+ [](const SortEntry & a , const SortEntry & b ){
962+ if (a .newColl < 0 && b .newColl >= 0 ) return false;
963+ if (a .newColl >= 0 && b .newColl < 0 ) return true;
964+ return a .newColl < b .newColl ;
965+ });
966+ std ::vector < Long64_t > rowOrder ;
967+ rowOrder .reserve (nSrc );
968+ for (auto & e : entries ) rowOrder .push_back (e .srcRow );
969+
970+ // Reorder rows and remap fIndexCollisions values through collPerm.
971+ PermMap perm = rewriteTable (src , dirOut , rowOrder , "fIndexCollisions" , * collPermP );
972+ allPerms [tname ] = std ::move (perm );
973+ written .insert (tname );
974+ }
975+ }
976+
861977static void processPasteJoinTables (
862978 TDirectory * dirIn , TDirectory * dirOut ,
863979 const std ::unordered_map < std ::string , PermMap > & allPerms ,
@@ -870,6 +986,12 @@ static void processPasteJoinTables(
870986 const PermMap * mcParticlePerm = findPermByPrefix (allPerms , "O2mcparticle" );
871987 const PermMap * mcCollPermP = findPermByPrefix (allPerms , "O2mccollision_" );
872988 const PermMap * collPermP = findPermByPrefix (allPerms , "O2collision_" );
989+ // Track tables reordered in Stage 1b: every reference into them must be
990+ // remapped through their permutation (null if the table is absent / wasn't
991+ // reordered, in which case no remap is needed).
992+ const PermMap * trkPerm = findPermByPrefix (allPerms , "O2track_iu" );
993+ const PermMap * mftPerm = findPermByPrefix (allPerms , "O2mfttrack" );
994+ const PermMap * fwdPerm = findPermByPrefix (allPerms , "O2fwdtrack" );
873995 // bcPermP is passed in from processDF (the BC table is the only stage
874996 // whose permutation isn't already published in allPerms).
875997
@@ -918,6 +1040,23 @@ static void processPasteJoinTables(
9181040 extraRemaps .push_back ({"fIndexBC" , bcPermP });
9191041 }
9201042
1043+ // Track-pointing indices: the track tables may have been reordered in
1044+ // Stage 1b, so every reference into them must be remapped through the
1045+ // corresponding permutation. (No-op when the perm is null / absent.)
1046+ auto addTrkRemap = [& ](const char * br , const PermMap * pm ) {
1047+ if (pm && src -> GetBranch (br )) extraRemaps .push_back ({br , pm });
1048+ };
1049+ addTrkRemap ("fIndexTracks ", trkPerm );
1050+ addTrkRemap ("fIndexTracks_0" , trkPerm );
1051+ addTrkRemap ("fIndexTracks_1" , trkPerm );
1052+ addTrkRemap ("fIndexTracks_2" , trkPerm );
1053+ addTrkRemap ("fIndexTracks_Pos" , trkPerm );
1054+ addTrkRemap ("fIndexTracks_Neg" , trkPerm );
1055+ addTrkRemap ("fIndexTracks_ITS" , trkPerm );
1056+ addTrkRemap ("fIndexMFTTracks" , mftPerm );
1057+ addTrkRemap ("fIndexFwdTracks" , fwdPerm );
1058+ addTrkRemap ("fIndexFwdTracks_MatchMCHTrack" , fwdPerm );
1059+
9211060 // Find a paste-join parent for this table (kPasteJoins lookup).
9221061 const PermMap * parentPerm = nullptr ;
9231062 std ::string parentName ;
@@ -1020,10 +1159,10 @@ static void processDF(TDirectory *dirIn, TDirectory *dirOut) {
10201159 return ;
10211160 }
10221161
1023- // ---- Stage 0: sort & deduplicate BCs ----
1162+ // ---- Stage 0: deduplicate BCs (order-preserving) ----
10241163 std ::cout << "-- Stage 0: BCs --\n" ;
10251164 dirOut -> cd ();
1026- BCStage0Result s0 = stage0_sortBCs (treeBCs , dirOut );
1165+ BCStage0Result s0 = stage0_dedupBCs (treeBCs , dirOut );
10271166 if (treeFlags ) stage0_copyBCFlags (treeFlags , dirOut , s0 .bcPerm );
10281167
10291168 // Track which tree names have been written so we don't double-write
@@ -1056,6 +1195,13 @@ static void processDF(TDirectory *dirIn, TDirectory *dirOut) {
10561195 std ::cout << " (no MCCollision table found — skipping stage 2)\n" ;
10571196 }
10581197
1198+ // ---- Stage 1b: regroup collision-grouped track tables ----
1199+ // Must run after Stage 1 (needs the collision permutation) and before the
1200+ // paste-join stage (so children follow the new track order and fIndexTracks*
1201+ // references are remapped). Publishes track permutations into stage1Perms.
1202+ std ::cout << "-- Stage 1b: collision-grouped track tables --\n" ;
1203+ stage1b_reorderTrackTables (dirIn , dirOut , stage1Perms , written );
1204+
10591205 // ---- Paste-join tables + unrelated tables ----
10601206 std ::cout << "-- Paste-join and unrelated tables --\n" ;
10611207 processPasteJoinTables (dirIn , dirOut , stage1Perms , written , & s0 .bcPerm );
0 commit comments