Implement labels in the output. Tested.

gwm17 · gwm17 · commit c743bab61de2 · 2025-02-24T11:10:03.000-05:00
diff --git a/src/attpc_engine/detector/simulator.py b/src/attpc_engine/detector/simulator.py
@@ -17,7 +17,9 @@
 
 
 @njit
-def dict_to_points(points: NumbaTypedDict[int, int]) -> np.ndarray:
+def dict_to_points(
+    points: NumbaTypedDict[int, tuple[int, int]],
+) -> tuple[np.ndarray, np.ndarray]:
     """
     Converts dictionary of N pad,tb keys with corresponding number of electrons
     to Nx3 array where each row is [pad, tb, e], now combined over pad/tb combos.
@@ -29,17 +31,19 @@ def dict_to_points(points: NumbaTypedDict[int, int]) -> np.ndarray:
 
     Returns
     -------
-    point_array: numpy.ndarray
-        Array of points.
+    tuple[numpy.ndarray, numpy.ndarray]
+        Array of points and lables (in that order)
     """
     point_array = np.empty((len(points), 3), dtype=float)
-    for idx, point in enumerate(points.items()):
-        tb, pad = unpair(point[0])
+    label_array = np.empty(len(points), dtype=types.int64)
+    for idx, (key, data) in enumerate(points.items()):
+        tb, pad = unpair(key)
         point_array[idx, 0] = pad
         point_array[idx, 1] = tb
-        point_array[idx, 2] = point[1]
+        point_array[idx, 2] = data[0]
+        label_array[idx] = data[1]
 
-    return point_array
+    return point_array, label_array
 
 
 def simulate(
@@ -49,38 +53,31 @@ def simulate(
     mass_numbers: np.ndarray,
     config: Config,
     rng: Generator,
-    indicies: list[int] | None,
-) -> np.ndarray:
-    nuclei_to_sim = None
-    if indicies is not None:
-        nuclei_to_sim = indicies
-    else:
-        # default nuclei to sim, all final outgoing particles
-        nuclei_to_sim = [idx for idx in range(2, len(proton_numbers), 2)]
-        nuclei_to_sim.append(len(proton_numbers) - 1)  # add the last
-
-    points = Dict.empty(key_type=types.int64, value_type=types.int64)
-
-    for idx in nuclei_to_sim:
+    indicies: list[int],
+) -> tuple[np.ndarray, np.ndarray]:
+    points = Dict.empty(
+        key_type=types.int64, value_type=types.Tuple(types=[types.int64, types.int64])
+    )
+    for idx in indicies:
         if proton_numbers[idx] == 0:
             continue
         nucleus = nuclear_map.get_data(proton_numbers[idx], mass_numbers[idx])
         momentum = momenta[idx]
-        generate_point_cloud(momentum, vertex, nucleus, config, rng, points)
+        generate_point_cloud(momentum, vertex, nucleus, config, rng, points, idx)
 
     # Convert to numpy array of [pad, tb, e], now combined over pad/tb combos
-    point_array = dict_to_points(points)
+    point_array, label_array = dict_to_points(points)
 
     # Wiggle point TBs over interval [0.0, 1.0). This simulates effect of converting
     # the (in principle) int TBs to floats.
     point_array[:, 1] += rng.uniform(low=0.0, high=1.0, size=len(point_array))
 
     # Remove points outside legal bounds in time. TODO check if this is needed
-    point_array = point_array[
-        np.logical_and(0 <= point_array[:, 1], point_array[:, 1] < NUM_TB)
-    ]
+    mask = np.logical_and(0 <= point_array[:, 1], point_array[:, 1] < NUM_TB)
+    point_array = point_array[mask]
+    label_array = label_array[mask]
 
-    return point_array
+    return point_array, label_array
 
 
 def run_simulation(
@@ -107,9 +104,18 @@ def run_simulation(
     print(f"Applying detector effects to kinematics from file: {input_path}")
     input = h5.File(input_path, "r")
     input_data_group: h5.Group = input["data"]  # type: ignore
-    proton_numbers = input_data_group.attrs["proton_numbers"]
+    proton_numbers: np.ndarray = input_data_group.attrs["proton_numbers"]  # type: ignore
     mass_numbers = input_data_group.attrs["mass_numbers"]
 
+    # Decide which nuclei to sim, either by user input or all reaction final products
+    nuclei_to_sim = None
+    if indicies is not None:
+        nuclei_to_sim = indicies
+    else:
+        # default nuclei to sim, all final outgoing particles
+        nuclei_to_sim = [idx for idx in range(2, len(proton_numbers), 2)]
+        nuclei_to_sim.append(len(proton_numbers) - 1)  # add the last
+
     n_events: int = input_data_group.attrs["n_events"]  # type: ignore
     miniters = int(0.01 * n_events)
     n_chunks: int = input_data_group.attrs["n_chunks"]  # type: ignore
@@ -138,7 +144,7 @@ def run_simulation(
         dataset: h5.Dataset = input_data_group[f"chunk_{chunk}"][  # type: ignore
             f"event_{event_number}"
         ]  # type: ignore
-        cloud = simulate(
+        cloud, labels = simulate(
             dataset[:].copy(),  # type: ignore
             np.array(
                 [
@@ -151,13 +157,13 @@ def run_simulation(
             mass_numbers,  # type: ignore
             config,
             rng,
-            indicies,
+            nuclei_to_sim,
         )
 
         if len(cloud) == 0:
             continue
 
-        writer.write(cloud, config, event_number)
+        writer.write(cloud, labels, config, event_number)
     writer.close()
     print("Done.")
     print("----------------------------------------")
diff --git a/src/attpc_engine/detector/solver.py b/src/attpc_engine/detector/solver.py
@@ -346,6 +346,7 @@ def generate_point_cloud(
     config: Config,
     rng: Generator,
     points: NumbaTypedDict,
+    label: int,
 ) -> None:
     """Create the point cloud
 
@@ -391,4 +392,5 @@ def generate_point_cloud(
         track,
         electrons,
         points,
+        label,
     )
diff --git a/src/attpc_engine/detector/transporter.py b/src/attpc_engine/detector/transporter.py
@@ -124,7 +124,8 @@ def point_transport(
     time: float,
     center: tuple[float, float],
     electrons: int,
-    points: NumbaTypedDict[int, int],
+    points: NumbaTypedDict[int, tuple[int, int]],
+    label: int,
 ):
     """
     Transports all electrons created at a point in a simulated nucleus' track
@@ -157,9 +158,9 @@ def point_transport(
     if pad != -1 and pad not in BEAM_PADS_ARRAY:
         tb = int(time)  # Convert from absolute time bucket to discretized
         id = pair(tb, pad)
-        points[id] = (
-            points.get(id, 0) + electrons
-        )  # The get returns 0 if the key doesn't exist
+        charge, _ = points.get(id, (0, 0))  # The get returns 0 if the key doesn't exist
+        charge += electrons
+        points[id] = (charge, label)
 
 
 @njit
@@ -170,7 +171,8 @@ def transverse_transport(
     center: tuple[float, float],
     electrons: int,
     sigma_t: float,
-    points: NumbaTypedDict[int, int],
+    points: NumbaTypedDict[int, tuple[int, int]],
+    label: int,
 ):
     """
     Transports all electrons created at a point in a simulated nucleus'
@@ -233,59 +235,9 @@ def transverse_transport(
                     * electrons
                 )
             )
-            points[id] = (
-                points.get(id, 0) + pixel_electrons
-            )  # The get returns 0 if the key doesn't exist
-
-
-@njit
-def find_pads_hit(
-    pad_grid: np.ndarray,
-    grid_edges: np.ndarray,
-    time: float,
-    center: tuple[float, float],
-    electrons: int,
-    sigma_t: float,
-    points: NumbaTypedDict[int, int],
-):
-    """
-    Finds the pads hit by transporting the electrons created at a point in
-    the nucleus' trajectory to the pad plane and applies transverse diffusion, if selected.
-
-    Parameters
-    ----------
-    pad_grid: numpy.ndarray
-        Grid of pad id for a given index, where index is calculated from x-y position
-    grid_edges: numpy.ndarray
-        Edges of the pad grid in mm, as well as the step size of the grid in mm
-        Allows conversion of position to grid index. 3 element array [low_edge, hi_edge, step]
-    time: float
-        Time of point being transported.
-    center: tuple[float, float]
-        (x,y) position of point being transported.
-    electrons: int
-        Number of electrons made at point being transported.
-    sigma_t: float
-        Standard deviation of transverse diffusion at point
-        being transported.
-    points: numba.typed.Dict[int, int]
-        A dictionary mapping a unique pad,tb key to the number of electrons, which
-        will be filled by this function
-    """
-    # Point transport
-    if sigma_t == 0.0:
-        point_transport(pad_grid, grid_edges, time, center, electrons, points)
-    # Transverse diffusion transport
-    else:
-        transverse_transport(
-            pad_grid,
-            grid_edges,
-            time,
-            center,
-            electrons,
-            sigma_t,
-            points,
-        )
+            charge, _ = points.get(id, (0, 0))
+            charge += pixel_electrons
+            points[id] = (charge, label)  # The get returns 0 if the key doesn't exist
 
 
 @njit
@@ -297,7 +249,8 @@ def transport_track(
     dv: float,
     track: np.ndarray,
     electrons: np.ndarray,
-    points: NumbaTypedDict[int, int],
+    points: NumbaTypedDict[int, tuple[int, int]],
+    label: int,
 ):
     """
     High-level function that transports each point in a nucleus' trajectory
@@ -333,6 +286,19 @@ def transport_track(
         center = (row[0], row[1])
         point_electrons = electrons[idx]
         sigma_t = np.sqrt(2.0 * diffusion * dv * time / efield)
-        find_pads_hit(
-            pad_grid, grid_edges, time, center, point_electrons, sigma_t, points
-        )
+        if sigma_t == 0.0:
+            point_transport(
+                pad_grid, grid_edges, time, center, point_electrons, points, label
+            )
+        # Transverse diffusion transport
+        else:
+            transverse_transport(
+                pad_grid,
+                grid_edges,
+                time,
+                center,
+                point_electrons,
+                sigma_t,
+                points,
+                label,
+            )
diff --git a/src/attpc_engine/detector/writer.py b/src/attpc_engine/detector/writer.py
@@ -23,7 +23,9 @@ class SimulationWriter(Protocol):
         Closes the writer.
     """
 
-    def write(self, data: np.ndarray, config: Config, event_number: int) -> None:
+    def write(
+        self, data: np.ndarray, labels: np.ndarray, config: Config, event_number: int
+    ) -> None:
         """
         Writes a simulated point cloud to the point cloud file.
 
@@ -83,8 +85,6 @@ def convert_to_spyral(
         (x, y) coordinates of each pad's center on the pad plane in mm.
     pad_sizes: np.ndarray
         Contains size of each pad.
-    adc_threshold: int
-        Minimum ADC signal amplitude a point must have in the point cloud.
 
     Returns
     -------
@@ -107,11 +107,7 @@ def convert_to_spyral(
         storage[idx, 6] = point[1]
         storage[idx, 7] = pad_sizes[int(point[0])]
 
-    if adc_threshold >= 4095:
-        raise ValueError(
-            "adc_threshold cannot be equal to or greater than the max GET ADC value!"
-        )
-    return storage[storage[:, 3] > adc_threshold]
+    return storage
 
 
 class SpyralWriter:
@@ -191,7 +187,9 @@ def create_next_file(self) -> None:
         self.file = h5.File(path, "w")
         self.cloud_group: h5.Group = self.file.create_group("cloud")
 
-    def write(self, data: np.ndarray, config: Config, event_number: int) -> None:
+    def write(
+        self, data: np.ndarray, labels: np.ndarray, config: Config, event_number: int
+    ) -> None:
         """
         Writes a simulated point cloud to the point cloud file.
 
@@ -224,21 +222,27 @@ def write(self, data: np.ndarray, config: Config, event_number: int) -> None:
             config.pad_sizes,
             config.elec_params.adc_threshold,
         )
+        # apply ADC threshold
+        mask = spyral_format[:, 3] > config.elec_params.adc_threshold
+        spyral_format = spyral_format[mask]
+        labels = labels[mask]
         # Make sure we're still sorted in z
         indicies = np.argsort(spyral_format[:, 2])
         spyral_format = spyral_format[indicies]
+        labels = labels[indicies]
 
-        dset = self.cloud_group.create_dataset(
+        pc_dset = self.cloud_group.create_dataset(
             f"cloud_{event_number}", data=spyral_format
         )
-
-        dset.attrs["orig_run"] = self.run_number
-        dset.attrs["orig_event"] = event_number
+        pc_dset.attrs["orig_run"] = self.run_number
+        pc_dset.attrs["orig_event"] = event_number
         # No ic stuff from simulation
-        dset.attrs["ic_amplitude"] = -1.0
-        dset.attrs["ic_multiplicity"] = -1.0
-        dset.attrs["ic_integral"] = -1.0
-        dset.attrs["ic_centroid"] = -1.0
+        pc_dset.attrs["ic_amplitude"] = -1.0
+        pc_dset.attrs["ic_multiplicity"] = -1.0
+        pc_dset.attrs["ic_integral"] = -1.0
+        pc_dset.attrs["ic_centroid"] = -1.0
+
+        _ = self.cloud_group.create_dataset(f"labels_{event_number}", data=labels)
 
         # We wrote an event
         self.events_written += 1
diff --git a/tests/test_detector.py b/tests/test_detector.py
@@ -4,7 +4,7 @@
     PadParams,
     Config,
 )
-from attpc_engine.detector.simulator import SimEvent
+from attpc_engine.detector.simulator import simulate
 from attpc_engine import nuclear_map
 
 from spyral_utils.nuclear.target import GasTarget
@@ -45,17 +45,19 @@ def test_simulation_event():
     # all protons bby
     fake_data = np.array(
         [
-            [0.0, 0.0, 0.0, 938.0],
-            [0.0, 0.0, 0.0, 938.0],
-            [0.0, 0.0, 0.0, 938.0],
-            [0.0, 0.0, 0.0, 938.0],
+            [0.0, 0.0, 10.0, 938.0],
+            [0.0, 0.0, 10.0, 938.0],
+            [0.0, 0.0, 10.0, 938.0],
+            [0.0, 0.0, 10.0, 938.0],
         ]
     )
 
     proton_numbers = np.array([1, 1, 1, 1])
     mass_numbers = np.array([1, 1, 1, 1])
     vertex = np.array([1.0, 1.0, 1.0])
+    config = Config(detector, electronics, pads)
+    rng = np.random.default_rng()
 
-    event = SimEvent(fake_data, vertex, proton_numbers, mass_numbers)
+    event = simulate(fake_data, vertex, proton_numbers, mass_numbers, config, rng, [0])
 
-    assert len(event.nuclei) == 2
+    assert len(event) == 2