Skip to content

Commit ddd6a88

Browse files
committed
Encode saturated qTot and tailLength in ClusterNative
1 parent e731821 commit ddd6a88

2 files changed

Lines changed: 119 additions & 7 deletions

File tree

DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ struct ClusterNative {
6262
static constexpr int scalePadPacked = 64; //< ~60 is needed for 0.1mm precision, but power of two avoids rounding
6363
static constexpr int scaleSigmaTimePacked = 32; // 1/32nd of pad/timebin precision for cluster size
6464
static constexpr int scaleSigmaPadPacked = 32;
65+
static constexpr int scaleSaturatedQTot = 4;
66+
static constexpr int maxSaturatedQTot = UINT16_MAX * scaleSaturatedQTot;
6567

6668
uint32_t timeFlagsPacked; //< Contains the time in the lower 24 bits in a packed format, contains the flags in the
6769
// upper 8 bits
@@ -138,6 +140,31 @@ struct ClusterNative {
138140
sigmaPadPacked = tmp;
139141
}
140142

143+
GPUd() bool isSaturated() const { return qMax >= 1023; }
144+
145+
GPUd() void setSaturatedQtot(uint32_t qtot)
146+
{
147+
if (qtot > maxSaturatedQTot) {
148+
qtot = maxSaturatedQTot;
149+
}
150+
this->qTot = qtot / scaleSaturatedQTot;
151+
}
152+
153+
GPUd() uint32_t getSaturatedQtot() const
154+
{
155+
return uint32_t(qTot) * scaleSaturatedQTot;
156+
}
157+
158+
GPUd() void setSaturatedTailLength(uint32_t tail)
159+
{
160+
sigmaTimePacked = encodeTailLength(tail);
161+
}
162+
163+
GPUd() uint32_t getSaturatedTailLength() const
164+
{
165+
return decodeTailLength(sigmaTimePacked);
166+
}
167+
141168
GPUd() bool operator<(const ClusterNative& rhs) const
142169
{
143170
if (this->getTimePacked() != rhs.getTimePacked()) {
@@ -167,6 +194,93 @@ struct ClusterNative {
167194
this->qTot == rhs.qTot &&
168195
this->getFlags() == rhs.getFlags();
169196
}
197+
198+
private:
199+
static constexpr GPUd() uint32_t decodeTailLength(uint8_t code)
200+
{
201+
// Quantize tail length into 8bits.
202+
// Max expected length is 1500 tbs.
203+
// But allow outliers up to 8000 tbs.
204+
//
205+
// Full code layout is:
206+
//
207+
// | Code range | Decoded values | Step | Codes |
208+
// | ---------: | -------------: | ----: | ----: |
209+
// | `0..63` | `0..63` | `1` | `64` |
210+
// | `64..95` | `64..126` | `2` | `32` |
211+
// | `96..127` | `128..252` | `4` | `32` |
212+
// | `128..159` | `256..504` | `8` | `32` |
213+
// | `160..223` | `512..1520` | `16` | `64` |
214+
// | `224..239` | `1552..2032` | `32` | `16` |
215+
// | `240..255` | `2048..8048` | `400` | `16` |
216+
//
217+
218+
if (code < 64) {
219+
return code;
220+
}
221+
222+
if (code < 160) {
223+
uint32_t q = (uint32_t)code - 64u;
224+
uint32_t exponent = (q >> 5) + 1u; // 1, 2, 3
225+
uint32_t mantissa = q & 31u; // 0..31
226+
227+
return (32u + mantissa) << exponent;
228+
}
229+
230+
if (code < 224) {
231+
return 512u + 16u * ((uint32_t)code - 160u);
232+
}
233+
234+
if (code < 240) {
235+
return 1552u + 32u * ((uint32_t)code - 224u);
236+
}
237+
238+
return 2048u + 400u * ((uint32_t)code - 240u);
239+
}
240+
241+
static constexpr GPUd() uint8_t encodeTailLength(uint32_t value)
242+
{
243+
// Saturate above representable range.
244+
if (value >= decodeTailLength(255)) [[unlikely]] {
245+
return 255;
246+
}
247+
248+
// Binary search for the first code whose decoded value >= value.
249+
uint8_t lo = 0;
250+
uint8_t hi = 255;
251+
252+
while (lo < hi) {
253+
uint8_t mid = lo + ((hi - lo) >> 1);
254+
uint32_t decoded = decodeTailLength(mid);
255+
256+
if (decoded < value) {
257+
lo = mid + 1;
258+
} else {
259+
hi = mid;
260+
}
261+
}
262+
263+
// lo is now the first code with decoded >= value.
264+
if (lo == 0) [[unlikely]] {
265+
return 0;
266+
}
267+
268+
uint8_t above_code = lo;
269+
uint8_t below_code = lo - 1;
270+
271+
uint32_t above_value = decodeTailLength(above_code);
272+
uint32_t below_value = decodeTailLength(below_code);
273+
274+
uint32_t above_error = above_value - value;
275+
uint32_t below_error = value - below_value;
276+
277+
// Tie-break downward.
278+
if (below_error <= above_error) {
279+
return below_code;
280+
} else {
281+
return above_code;
282+
}
283+
}
170284
};
171285

172286
// This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -529,11 +529,11 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
529529
const float firstWeight = tail->qTot;
530530
const float firstPad = tail->pad;
531531
const float firstTime = HIPTailTimeMean(*tail);
532-
const float firstTimeVariance = HIPTailTimeVariance(*tail);
533532
float padSum = firstWeight * firstPad;
534533
float padSqSum = firstWeight * firstPad * firstPad;
535534
float timeSum = firstWeight * firstTime;
536-
float timeSqSum = firstWeight * (firstTime * firstTime + firstTimeVariance);
535+
536+
uint32_t tailLength = tail->tailEnd - tail->tailStart;
537537

538538
while (tail->iNext != 0) {
539539

@@ -542,28 +542,26 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
542542
const float tailWeight = tail->qTot;
543543
const float tailPad = tail->pad;
544544
const float tailTime = HIPTailTimeMean(*tail);
545-
const float tailTimeVariance = HIPTailTimeVariance(*tail);
546545
qMax = CAMath::Max(qMax, tail->qMax);
547546
qTot += tail->qTot;
548547
padSum += tailWeight * tailPad;
549548
padSqSum += tailWeight * tailPad * tailPad;
550549
timeSum += tailWeight * tailTime;
551-
timeSqSum += tailWeight * (tailTime * tailTime + tailTimeVariance);
550+
tailLength = CAMath::Max<uint32_t>(tailLength, tail->tailEnd - tail->tailStart);
552551
}
553552

554553
const float weightSum = CAMath::Max(qTot, 1.f);
555554
float padMean = padSum / weightSum;
556555
float timeMean = timeSum / weightSum; // TODO: Use timebin of saturated signal instead! Time mean is biased for long tails.
557556
float padSigma = CAMath::Sqrt(CAMath::Max(0.f, padSqSum / weightSum - padMean * padMean));
558-
float timeSigma = CAMath::Sqrt(CAMath::Max(0.f, timeSqSum / weightSum - timeMean * timeMean));
559557

560558
tpc::ClusterNative cn;
561559
cn.qMax = qMax;
562-
cn.qTot = (uint16_t)CAMath::Min(qTot, 65535.f);
560+
cn.setSaturatedQtot(qTot);
561+
cn.setSaturatedTailLength(tailLength);
563562
float clusterTime = fragment.start + timeMean - clusterer.Param().rec.tpc.clustersShiftTimebinsClusterizer;
564563
cn.setTimeFlags(clusterTime, 0);
565564
cn.setPad(padMean);
566-
cn.setSigmaTime(timeSigma);
567565
cn.setSigmaPad(padSigma);
568566

569567
if (cn.qMax >= 1023) {

0 commit comments

Comments
 (0)