From ddd6a885b23c09c888cd2edbfab202a65d5627e1 Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer Date: Tue, 2 Jun 2026 13:09:41 +0200 Subject: [PATCH 1/7] Encode saturated qTot and tailLength in ClusterNative --- .../include/DataFormatsTPC/ClusterNative.h | 114 ++++++++++++++++++ .../GPUTPCCFCheckPadBaseline.cxx | 12 +- 2 files changed, 119 insertions(+), 7 deletions(-) diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h index f3070d456afb1..0860c0c3cd05c 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h @@ -62,6 +62,8 @@ struct ClusterNative { static constexpr int scalePadPacked = 64; //< ~60 is needed for 0.1mm precision, but power of two avoids rounding static constexpr int scaleSigmaTimePacked = 32; // 1/32nd of pad/timebin precision for cluster size static constexpr int scaleSigmaPadPacked = 32; + static constexpr int scaleSaturatedQTot = 4; + static constexpr int maxSaturatedQTot = UINT16_MAX * scaleSaturatedQTot; uint32_t timeFlagsPacked; //< Contains the time in the lower 24 bits in a packed format, contains the flags in the // upper 8 bits @@ -138,6 +140,31 @@ struct ClusterNative { sigmaPadPacked = tmp; } + GPUd() bool isSaturated() const { return qMax >= 1023; } + + GPUd() void setSaturatedQtot(uint32_t qtot) + { + if (qtot > maxSaturatedQTot) { + qtot = maxSaturatedQTot; + } + this->qTot = qtot / scaleSaturatedQTot; + } + + GPUd() uint32_t getSaturatedQtot() const + { + return uint32_t(qTot) * scaleSaturatedQTot; + } + + GPUd() void setSaturatedTailLength(uint32_t tail) + { + sigmaTimePacked = encodeTailLength(tail); + } + + GPUd() uint32_t getSaturatedTailLength() const + { + return decodeTailLength(sigmaTimePacked); + } + GPUd() bool operator<(const ClusterNative& rhs) const { if (this->getTimePacked() != rhs.getTimePacked()) { @@ -167,6 +194,93 @@ struct ClusterNative { this->qTot == rhs.qTot && this->getFlags() == rhs.getFlags(); } + + private: + static constexpr GPUd() uint32_t decodeTailLength(uint8_t code) + { + // Quantize tail length into 8bits. + // Max expected length is 1500 tbs. + // But allow outliers up to 8000 tbs. + // + // Full code layout is: + // + // | Code range | Decoded values | Step | Codes | + // | ---------: | -------------: | ----: | ----: | + // | `0..63` | `0..63` | `1` | `64` | + // | `64..95` | `64..126` | `2` | `32` | + // | `96..127` | `128..252` | `4` | `32` | + // | `128..159` | `256..504` | `8` | `32` | + // | `160..223` | `512..1520` | `16` | `64` | + // | `224..239` | `1552..2032` | `32` | `16` | + // | `240..255` | `2048..8048` | `400` | `16` | + // + + if (code < 64) { + return code; + } + + if (code < 160) { + uint32_t q = (uint32_t)code - 64u; + uint32_t exponent = (q >> 5) + 1u; // 1, 2, 3 + uint32_t mantissa = q & 31u; // 0..31 + + return (32u + mantissa) << exponent; + } + + if (code < 224) { + return 512u + 16u * ((uint32_t)code - 160u); + } + + if (code < 240) { + return 1552u + 32u * ((uint32_t)code - 224u); + } + + return 2048u + 400u * ((uint32_t)code - 240u); + } + + static constexpr GPUd() uint8_t encodeTailLength(uint32_t value) + { + // Saturate above representable range. + if (value >= decodeTailLength(255)) [[unlikely]] { + return 255; + } + + // Binary search for the first code whose decoded value >= value. + uint8_t lo = 0; + uint8_t hi = 255; + + while (lo < hi) { + uint8_t mid = lo + ((hi - lo) >> 1); + uint32_t decoded = decodeTailLength(mid); + + if (decoded < value) { + lo = mid + 1; + } else { + hi = mid; + } + } + + // lo is now the first code with decoded >= value. + if (lo == 0) [[unlikely]] { + return 0; + } + + uint8_t above_code = lo; + uint8_t below_code = lo - 1; + + uint32_t above_value = decodeTailLength(above_code); + uint32_t below_value = decodeTailLength(below_code); + + uint32_t above_error = above_value - value; + uint32_t below_error = value - below_value; + + // Tie-break downward. + if (below_error <= above_error) { + return below_code; + } else { + return above_code; + } + } }; // This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index dc05c477c9c5f..d9318b8fdb976 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -529,11 +529,11 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, const float firstWeight = tail->qTot; const float firstPad = tail->pad; const float firstTime = HIPTailTimeMean(*tail); - const float firstTimeVariance = HIPTailTimeVariance(*tail); float padSum = firstWeight * firstPad; float padSqSum = firstWeight * firstPad * firstPad; float timeSum = firstWeight * firstTime; - float timeSqSum = firstWeight * (firstTime * firstTime + firstTimeVariance); + + uint32_t tailLength = tail->tailEnd - tail->tailStart; while (tail->iNext != 0) { @@ -542,28 +542,26 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, const float tailWeight = tail->qTot; const float tailPad = tail->pad; const float tailTime = HIPTailTimeMean(*tail); - const float tailTimeVariance = HIPTailTimeVariance(*tail); qMax = CAMath::Max(qMax, tail->qMax); qTot += tail->qTot; padSum += tailWeight * tailPad; padSqSum += tailWeight * tailPad * tailPad; timeSum += tailWeight * tailTime; - timeSqSum += tailWeight * (tailTime * tailTime + tailTimeVariance); + tailLength = CAMath::Max(tailLength, tail->tailEnd - tail->tailStart); } const float weightSum = CAMath::Max(qTot, 1.f); float padMean = padSum / weightSum; float timeMean = timeSum / weightSum; // TODO: Use timebin of saturated signal instead! Time mean is biased for long tails. float padSigma = CAMath::Sqrt(CAMath::Max(0.f, padSqSum / weightSum - padMean * padMean)); - float timeSigma = CAMath::Sqrt(CAMath::Max(0.f, timeSqSum / weightSum - timeMean * timeMean)); tpc::ClusterNative cn; cn.qMax = qMax; - cn.qTot = (uint16_t)CAMath::Min(qTot, 65535.f); + cn.setSaturatedQtot(qTot); + cn.setSaturatedTailLength(tailLength); float clusterTime = fragment.start + timeMean - clusterer.Param().rec.tpc.clustersShiftTimebinsClusterizer; cn.setTimeFlags(clusterTime, 0); cn.setPad(padMean); - cn.setSigmaTime(timeSigma); cn.setSigmaPad(padSigma); if (cn.qMax >= 1023) { From 4e6c96473eef498f167710b581d489f68df0b983 Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer Date: Tue, 2 Jun 2026 13:25:31 +0200 Subject: [PATCH 2/7] Adjust cluster compression --- .../DataCompression/GPUTPCClusterStatistics.cxx | 6 ++++-- GPU/GPUTracking/DataCompression/GPUTPCCompression.h | 2 +- .../DataCompression/GPUTPCCompressionKernels.cxx | 12 ++++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx index 3d8e749e84147..04eee78e2e70b 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx @@ -128,9 +128,11 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* tmpClusters[k] = clustersNative->clusters[i][j][k]; if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) { GPUTPCCompression::truncateSignificantBitsChargeMax(tmpClusters[k].qMax, param); - GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param); GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaPadPacked, param); - GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param); + if (!tmpClusters[k].isSaturated()) { + GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param); + GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param); + } } } std::sort(tmpClusters.begin(), tmpClusters.end()); diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index 82e44eda6f3cc..5b3d269da5a88 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -47,7 +47,7 @@ class GPUTPCCompression : public GPUProcessor #endif static constexpr uint32_t P_MAX_QMAX = 1 << 10; - static constexpr uint32_t P_MAX_QTOT = 5 * 5 * P_MAX_QMAX; + static constexpr uint32_t P_MAX_QTOT = 1 << 16; static constexpr uint32_t P_MAX_TIME = 1 << 24; static constexpr uint32_t P_MAX_PAD = 1 << 16; static constexpr uint32_t P_MAX_SIGMA = 1 << 8; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index b98f5c28f57b0..c23705bb614c3 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -121,9 +121,11 @@ GPUdii() void GPUTPCCompressionKernels::Thread Date: Tue, 2 Jun 2026 14:03:03 +0200 Subject: [PATCH 3/7] Compute tail length as earliest tb to last tb from any tail --- .../TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index d9318b8fdb976..90a2fbb733148 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -533,7 +533,8 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, float padSqSum = firstWeight * firstPad * firstPad; float timeSum = firstWeight * firstTime; - uint32_t tailLength = tail->tailEnd - tail->tailStart; + uint32_t tailStart = tail->tailStart; + uint32_t tailEnd = tail->tailEnd; while (tail->iNext != 0) { @@ -547,7 +548,8 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, padSum += tailWeight * tailPad; padSqSum += tailWeight * tailPad * tailPad; timeSum += tailWeight * tailTime; - tailLength = CAMath::Max(tailLength, tail->tailEnd - tail->tailStart); + tailStart = CAMath::Min(tailStart, tail->tailStart); + tailEnd = CAMath::Max(tailEnd, tail->tailEnd); } const float weightSum = CAMath::Max(qTot, 1.f); @@ -558,7 +560,7 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, tpc::ClusterNative cn; cn.qMax = qMax; cn.setSaturatedQtot(qTot); - cn.setSaturatedTailLength(tailLength); + cn.setSaturatedTailLength(tailEnd - tailStart); float clusterTime = fragment.start + timeMean - clusterer.Param().rec.tpc.clustersShiftTimebinsClusterizer; cn.setTimeFlags(clusterTime, 0); cn.setPad(padMean); From 81779eb828c1a8de882b3cac0c05d62d7af87f19 Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer Date: Tue, 2 Jun 2026 14:04:24 +0200 Subject: [PATCH 4/7] Add fallbacks when accessing properties of saturated clusters --- .../include/DataFormatsTPC/ClusterNative.h | 20 ++++++++++++++++--- .../GPUTPCClusterFinderDump.cxx | 8 +++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h index 0860c0c3cd05c..6b64591e36f9d 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h @@ -85,7 +85,15 @@ struct ClusterNative { } GPUd() uint16_t getQmax() const { return qMax; } - GPUd() uint16_t getQtot() const { return qTot; } + GPUd() uint16_t getQtot() const + { + if (isSaturated()) [[unlikely]] { + // Check for overflow, so return type can stay uint16 + auto sqtot = getSaturatedQtot(); + return sqtot <= UINT16_MAX ? sqtot : UINT16_MAX; + } + return qTot; + } GPUd() uint8_t getFlags() const { return timeFlagsPacked >> 24; } GPUd() uint32_t getTimePacked() const { return timeFlagsPacked & 0xFFFFFF; } GPUd() void setTimePackedFlags(uint32_t timePacked, uint8_t flags) @@ -121,7 +129,13 @@ struct ClusterNative { /// Y = (12.4 - 0.5 * (66 - 1)) * 4.16mm = -83.616mm GPUd() float getPad() const { return unpackPad(padPacked); } GPUd() void setPad(float pad) { padPacked = packPad(pad); } - GPUd() float getSigmaTime() const { return float(sigmaTimePacked) * (1.f / scaleSigmaTimePacked); } + GPUd() float getSigmaTime() const + { + if (isSaturated()) [[unlikely]] { + return 0; + } + return float(sigmaTimePacked) * (1.f / scaleSigmaTimePacked); + } GPUd() void setSigmaTime(float sigmaTime) { uint32_t tmp = sigmaTime * scaleSigmaTimePacked + 0.5; @@ -147,7 +161,7 @@ struct ClusterNative { if (qtot > maxSaturatedQTot) { qtot = maxSaturatedQTot; } - this->qTot = qtot / scaleSaturatedQTot; + this->qTot = (qtot + scaleSaturatedQTot / 2) / scaleSaturatedQTot; } GPUd() uint32_t getSaturatedQtot() const diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index 09440825681f4..2b21af6a08bed 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -166,7 +166,13 @@ void GPUTPCClusterFinder::DumpClusters(std::ostream& out) out << "Row: " << i << ": " << N << "\n"; for (const auto& cl : sortedCluster) { - out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n"; + uint32_t qTot = cl.qTot; + uint32_t sigmaTime = cl.sigmaTimePacked; + if (cl.isSaturated()) { + qTot = cl.getSaturatedQtot(); + sigmaTime = cl.getSaturatedTailLength(); + } + out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << sigmaTime << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << qTot << "\n"; } } } From b5c0a5539930f573265f43375187697a8d121485 Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer Date: Tue, 2 Jun 2026 14:30:06 +0200 Subject: [PATCH 5/7] Format --- GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index 90a2fbb733148..105df23453624 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -534,7 +534,7 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, float timeSum = firstWeight * firstTime; uint32_t tailStart = tail->tailStart; - uint32_t tailEnd = tail->tailEnd; + uint32_t tailEnd = tail->tailEnd; while (tail->iNext != 0) { From 4bab503bb15a928af346400b297d516631fe53ae Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer Date: Tue, 2 Jun 2026 14:52:30 +0200 Subject: [PATCH 6/7] Fix OpenCL compilation --- .../Detectors/TPC/include/DataFormatsTPC/ClusterNative.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h index 6b64591e36f9d..122f873ef4415 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h @@ -15,6 +15,7 @@ #ifndef ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H #define ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H #ifndef GPUCA_GPUCODE_DEVICE +#include #include #include // for size_t #include @@ -63,7 +64,7 @@ struct ClusterNative { static constexpr int scaleSigmaTimePacked = 32; // 1/32nd of pad/timebin precision for cluster size static constexpr int scaleSigmaPadPacked = 32; static constexpr int scaleSaturatedQTot = 4; - static constexpr int maxSaturatedQTot = UINT16_MAX * scaleSaturatedQTot; + static constexpr int maxSaturatedQTot = USHRT_MAX * scaleSaturatedQTot; uint32_t timeFlagsPacked; //< Contains the time in the lower 24 bits in a packed format, contains the flags in the // upper 8 bits @@ -90,7 +91,7 @@ struct ClusterNative { if (isSaturated()) [[unlikely]] { // Check for overflow, so return type can stay uint16 auto sqtot = getSaturatedQtot(); - return sqtot <= UINT16_MAX ? sqtot : UINT16_MAX; + return sqtot <= USHRT_MAX ? sqtot : UINT16_MAX; } return qTot; } From 2afd2943fa951fdad67e769c67a2553f9686128c Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer Date: Tue, 2 Jun 2026 17:33:18 +0200 Subject: [PATCH 7/7] Fix OpenCL compilation (2) --- .../Detectors/TPC/include/DataFormatsTPC/ClusterNative.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h index 122f873ef4415..42bb7cd87d328 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h @@ -91,7 +91,7 @@ struct ClusterNative { if (isSaturated()) [[unlikely]] { // Check for overflow, so return type can stay uint16 auto sqtot = getSaturatedQtot(); - return sqtot <= USHRT_MAX ? sqtot : UINT16_MAX; + return sqtot <= USHRT_MAX ? sqtot : USHRT_MAX; } return qTot; }