Encode saturated qTot and tailLength in ClusterNative

fweig · fweig · commit ddd6a885b23c · 2026-06-02T13:09:41.000+02:00
diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
@@ -62,6 +62,8 @@ struct ClusterNative {
   static constexpr int scalePadPacked = 64;       //< ~60 is needed for 0.1mm precision, but power of two avoids rounding
   static constexpr int scaleSigmaTimePacked = 32; // 1/32nd of pad/timebin precision for cluster size
   static constexpr int scaleSigmaPadPacked = 32;
+  static constexpr int scaleSaturatedQTot = 4;
+  static constexpr int maxSaturatedQTot = UINT16_MAX * scaleSaturatedQTot;
 
   uint32_t timeFlagsPacked; //< Contains the time in the lower 24 bits in a packed format, contains the flags in the
                             // upper 8 bits
@@ -138,6 +140,31 @@ struct ClusterNative {
     sigmaPadPacked = tmp;
   }
 
+  GPUd() bool isSaturated() const { return qMax >= 1023; }
+
+  GPUd() void setSaturatedQtot(uint32_t qtot)
+  {
+    if (qtot > maxSaturatedQTot) {
+      qtot = maxSaturatedQTot;
+    }
+    this->qTot = qtot / scaleSaturatedQTot;
+  }
+
+  GPUd() uint32_t getSaturatedQtot() const
+  {
+    return uint32_t(qTot) * scaleSaturatedQTot;
+  }
+
+  GPUd() void setSaturatedTailLength(uint32_t tail)
+  {
+    sigmaTimePacked = encodeTailLength(tail);
+  }
+
+  GPUd() uint32_t getSaturatedTailLength() const
+  {
+    return decodeTailLength(sigmaTimePacked);
+  }
+
   GPUd() bool operator<(const ClusterNative& rhs) const
   {
     if (this->getTimePacked() != rhs.getTimePacked()) {
@@ -167,6 +194,93 @@ struct ClusterNative {
            this->qTot == rhs.qTot &&
            this->getFlags() == rhs.getFlags();
   }
+
+ private:
+  static constexpr GPUd() uint32_t decodeTailLength(uint8_t code)
+  {
+    // Quantize tail length into 8bits.
+    // Max expected length is 1500 tbs.
+    // But allow outliers up to 8000 tbs.
+    //
+    // Full code layout is:
+    //
+    // | Code range | Decoded values |  Step | Codes |
+    // | ---------: | -------------: | ----: | ----: |
+    // |    `0..63` |        `0..63` |   `1` |  `64` |
+    // |   `64..95` |      `64..126` |   `2` |  `32` |
+    // |  `96..127` |     `128..252` |   `4` |  `32` |
+    // | `128..159` |     `256..504` |   `8` |  `32` |
+    // | `160..223` |    `512..1520` |  `16` |  `64` |
+    // | `224..239` |   `1552..2032` |  `32` |  `16` |
+    // | `240..255` |   `2048..8048` | `400` |  `16` |
+    //
+
+    if (code < 64) {
+      return code;
+    }
+
+    if (code < 160) {
+      uint32_t q = (uint32_t)code - 64u;
+      uint32_t exponent = (q >> 5) + 1u; // 1, 2, 3
+      uint32_t mantissa = q & 31u;       // 0..31
+
+      return (32u + mantissa) << exponent;
+    }
+
+    if (code < 224) {
+      return 512u + 16u * ((uint32_t)code - 160u);
+    }
+
+    if (code < 240) {
+      return 1552u + 32u * ((uint32_t)code - 224u);
+    }
+
+    return 2048u + 400u * ((uint32_t)code - 240u);
+  }
+
+  static constexpr GPUd() uint8_t encodeTailLength(uint32_t value)
+  {
+    // Saturate above representable range.
+    if (value >= decodeTailLength(255)) [[unlikely]] {
+      return 255;
+    }
+
+    // Binary search for the first code whose decoded value >= value.
+    uint8_t lo = 0;
+    uint8_t hi = 255;
+
+    while (lo < hi) {
+      uint8_t mid = lo + ((hi - lo) >> 1);
+      uint32_t decoded = decodeTailLength(mid);
+
+      if (decoded < value) {
+        lo = mid + 1;
+      } else {
+        hi = mid;
+      }
+    }
+
+    // lo is now the first code with decoded >= value.
+    if (lo == 0) [[unlikely]] {
+      return 0;
+    }
+
+    uint8_t above_code = lo;
+    uint8_t below_code = lo - 1;
+
+    uint32_t above_value = decodeTailLength(above_code);
+    uint32_t below_value = decodeTailLength(below_code);
+
+    uint32_t above_error = above_value - value;
+    uint32_t below_error = value - below_value;
+
+    // Tie-break downward.
+    if (below_error <= above_error) {
+      return below_code;
+    } else {
+      return above_code;
+    }
+  }
 };
 
 // This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
@@ -529,11 +529,11 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
     const float firstWeight = tail->qTot;
     const float firstPad = tail->pad;
     const float firstTime = HIPTailTimeMean(*tail);
-    const float firstTimeVariance = HIPTailTimeVariance(*tail);
     float padSum = firstWeight * firstPad;
     float padSqSum = firstWeight * firstPad * firstPad;
     float timeSum = firstWeight * firstTime;
-    float timeSqSum = firstWeight * (firstTime * firstTime + firstTimeVariance);
+
+    uint32_t tailLength = tail->tailEnd - tail->tailStart;
 
     while (tail->iNext != 0) {
 
@@ -542,28 +542,26 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
       const float tailWeight = tail->qTot;
       const float tailPad = tail->pad;
       const float tailTime = HIPTailTimeMean(*tail);
-      const float tailTimeVariance = HIPTailTimeVariance(*tail);
       qMax = CAMath::Max(qMax, tail->qMax);
       qTot += tail->qTot;
       padSum += tailWeight * tailPad;
       padSqSum += tailWeight * tailPad * tailPad;
       timeSum += tailWeight * tailTime;
-      timeSqSum += tailWeight * (tailTime * tailTime + tailTimeVariance);
+      tailLength = CAMath::Max<uint32_t>(tailLength, tail->tailEnd - tail->tailStart);
     }
 
     const float weightSum = CAMath::Max(qTot, 1.f);
     float padMean = padSum / weightSum;
     float timeMean = timeSum / weightSum; // TODO: Use timebin of saturated signal instead! Time mean is biased for long tails.
     float padSigma = CAMath::Sqrt(CAMath::Max(0.f, padSqSum / weightSum - padMean * padMean));
-    float timeSigma = CAMath::Sqrt(CAMath::Max(0.f, timeSqSum / weightSum - timeMean * timeMean));
 
     tpc::ClusterNative cn;
     cn.qMax = qMax;
-    cn.qTot = (uint16_t)CAMath::Min(qTot, 65535.f);
+    cn.setSaturatedQtot(qTot);
+    cn.setSaturatedTailLength(tailLength);
     float clusterTime = fragment.start + timeMean - clusterer.Param().rec.tpc.clustersShiftTimebinsClusterizer;
     cn.setTimeFlags(clusterTime, 0);
     cn.setPad(padMean);
-    cn.setSigmaTime(timeSigma);
     cn.setSigmaPad(padSigma);
 
     if (cn.qMax >= 1023) {