|
1 | | -import numpy as np |
| 1 | +import os.path |
| 2 | +from typing import Union |
| 3 | +from dataclasses import dataclass |
| 4 | +import numpy as np |
2 | 5 | import pandas as pd |
3 | 6 | import tensorflow as tf |
4 | 7 |
|
|
19 | 22 |
|
20 | 23 |
|
21 | 24 |
|
22 | | -def invertColumnTransformer(column_transformer, preprocessed_X): |
23 | | - from sklearn.compose import ColumnTransformer |
24 | | - assert isinstance(column_transformer, ColumnTransformer) |
25 | 25 |
|
26 | | - iCol = 0 |
27 | | - postprocessed_split = dict() |
28 | | - for name, algo, cols in column_transformer.transformers_: |
29 | | - preprocessed_cols = list() |
30 | | - for _ in range(len(cols)): |
31 | | - preprocessed_cols.append(preprocessed_X[:, iCol][:, None]) |
32 | | - iCol += 1 |
33 | | - preprocessed_block = np.concatenate(preprocessed_cols, axis=1) |
34 | | - if algo == "passthrough": |
35 | | - postprocessed_split[name] = preprocessed_block |
| 26 | +@dataclass(frozen=True) |
| 27 | +class PyPhotons: |
| 28 | + efficiency_model: Union[str, None] |
| 29 | + resolution_model: Union[str, None] |
| 30 | + |
| 31 | + def invertColumnTransformer(self, column_transformer, preprocessed_X): |
| 32 | + from sklearn.compose import ColumnTransformer |
| 33 | + assert isinstance(column_transformer, ColumnTransformer) |
| 34 | + |
| 35 | + iCol = 0 |
| 36 | + postprocessed_split = dict() |
| 37 | + for name, algo, cols in column_transformer.transformers_: |
| 38 | + preprocessed_cols = list() |
| 39 | + for _ in range(len(cols)): |
| 40 | + preprocessed_cols.append(preprocessed_X[:, iCol][:, None]) |
| 41 | + iCol += 1 |
| 42 | + preprocessed_block = np.concatenate(preprocessed_cols, axis=1) |
| 43 | + if algo == "passthrough": |
| 44 | + postprocessed_split[name] = preprocessed_block |
| 45 | + else: |
| 46 | + postprocessed_split[name] = algo.inverse_transform(preprocessed_block) |
| 47 | + |
| 48 | + X = [None] * preprocessed_X.shape[1] |
| 49 | + for name, _, cols in column_transformer.transformers_: |
| 50 | + for i, iCol in enumerate(cols): |
| 51 | + X[iCol] = postprocessed_split[name][:, i][:, None] |
| 52 | + |
| 53 | + return np.concatenate(X, axis=1) |
| 54 | + |
| 55 | + def _eval_efficiency(self, X): |
| 56 | + efficiency_model = tf.keras.models.load_model(self.efficiency_model) |
| 57 | + with open(os.path.join(self.efficiency_model, 'tX.pkl'), 'rb') as tx_file: |
| 58 | + tX = pickle.load(tx_file) |
| 59 | + |
| 60 | + y_hat = efficiency_model.predict(tX.transform(X), batch_size=10000, verbose=0) |
| 61 | + |
| 62 | + return y_hat |
| 63 | + |
| 64 | + def _eval_smearing(self, X): |
| 65 | + smearing_model = tf.keras.models.load_model(self.resolution_model) |
| 66 | + with open(os.path.join(self.resolution_model, 'tX.pkl'), 'rb') as tx_file: |
| 67 | + tX = pickle.load(tx_file) |
| 68 | + |
| 69 | + with open(os.path.join(self.resolution_model, 'tY.pkl'), 'rb') as ty_file: |
| 70 | + tY = pickle.load(ty_file) |
| 71 | + |
| 72 | + prep_x = tX.transform(X) |
| 73 | + n_entries, _ = X.shape |
| 74 | + prep_y_hat = smearing_model.predict( |
| 75 | + np.c_[prep_x, np.random.normal(0, 1, (n_entries, 64))], |
| 76 | + verbose=0, |
| 77 | + batch_size=10000 |
| 78 | + ) |
| 79 | + |
| 80 | + ret = self.invertColumnTransformer(tY, prep_y_hat) |
| 81 | + return ret |
| 82 | + |
| 83 | + @PyLamarr.method |
| 84 | + def __call__(self, db): |
| 85 | + gen_photons = pd.read_sql_query(""" |
| 86 | + SELECT gev.datasource_id AS event_id, p.*, v.* |
| 87 | + FROM MCParticles AS p |
| 88 | + JOIN MCVertices AS v |
| 89 | + ON p.production_vertex == v.mcvertex_id |
| 90 | + AND p.genevent_id == v.genevent_id |
| 91 | + JOIN GenEvents AS gev |
| 92 | + ON gev.genevent_id = p.genevent_id |
| 93 | + WHERE |
| 94 | + pid = 22 |
| 95 | + AND |
| 96 | + p.pe > 1000 |
| 97 | + AND |
| 98 | + abs(v.x) < 200 |
| 99 | + AND |
| 100 | + abs(v.y) < 200 |
| 101 | + AND |
| 102 | + abs(v.z) < 2000 |
| 103 | + """, db) |
| 104 | + |
| 105 | + event_id = gen_photons.event_id.values.astype(np.int64) |
| 106 | + mcparticle_id = gen_photons.mcparticle_id.values.astype(np.int32) |
| 107 | + ovx, ovy, ovz = gen_photons[['x', 'y', 'z']].values.astype(np.float64).T |
| 108 | + e, px, py, pz = gen_photons[['pe', 'px', 'py', 'pz']].values.astype(np.float64).T |
| 109 | + |
| 110 | + print (e) |
| 111 | + |
| 112 | + tx = px/pz |
| 113 | + ty = py/pz |
| 114 | + |
| 115 | + ecal_z = 12689. # mm |
| 116 | + ecal_x = ovx + tx * (ecal_z - ovz) |
| 117 | + ecal_y = ovy + ty * (ecal_z - ovz) |
| 118 | + |
| 119 | + mask = ( |
| 120 | + (ecal_x > -4e3) & (ecal_x < 4e3) & |
| 121 | + (ecal_y > -4e3) & (ecal_y < 4e3) & |
| 122 | + (tx > -0.35) & (tx < 0.35) & |
| 123 | + (ty > -0.25) & (ty < 0.25) & |
| 124 | + (pz > 0) & (pz < 200e3) |
| 125 | + ) |
| 126 | + |
| 127 | + X_eff = np.c_[ecal_x, ecal_y, np.log(e/1e3), tx, ty, ovx, ovy, ovz, np.zeros_like(ecal_x)] |
| 128 | + |
| 129 | + |
| 130 | + if self.efficiency_model not in [None, "None", "none", "null"]: |
| 131 | + efficiency = self._eval_efficiency(X_eff) |
| 132 | + r = np.random.uniform(0, 1, len(X_eff)) |
| 133 | + |
| 134 | + ceff = np.cumsum(efficiency, 1) |
| 135 | + eff_as_photon = r < ceff[:,0] |
| 136 | + eff_as_photon_from_pi0 = (r > ceff[:,0]) & (r < ceff[:,1]) |
36 | 137 | else: |
37 | | - postprocessed_split[name] = algo.inverse_transform(preprocessed_block) |
38 | | - |
39 | | - X = [None] * preprocessed_X.shape[1] |
40 | | - for name, _, cols in column_transformer.transformers_: |
41 | | - for i, iCol in enumerate(cols): |
42 | | - X[iCol] = postprocessed_split[name][:, i][:, None] |
43 | | - |
44 | | - return np.concatenate(X, axis=1) |
45 | | - |
46 | | - |
47 | | -def _eval_efficiency(X): |
48 | | - efficiency_model = tf.keras.models.load_model(EFFICIENCY_MODEL/EFFICIENCY_MODEL_VERSION) |
49 | | - with open(EFFICIENCY_MODEL / EFFICIENCY_MODEL_VERSION / 'tX.pkl', 'rb') as tx_file: |
50 | | - tX = pickle.load(tx_file) |
51 | | - |
52 | | - y_hat = efficiency_model.predict(tX.transform(X), batch_size=10000, verbose=0) |
53 | | - |
54 | | - return y_hat |
55 | | - |
56 | | - |
57 | | -def _eval_smearing(X): |
58 | | - smearing_model = tf.keras.models.load_model(SMEARING_MODEL / SMEARING_MODEL_VERSION) |
59 | | - with open(SMEARING_MODEL / SMEARING_MODEL_VERSION / 'tX.pkl', 'rb') as tx_file: |
60 | | - tX = pickle.load(tx_file) |
61 | | - |
62 | | - with open(SMEARING_MODEL / SMEARING_MODEL_VERSION / 'tY.pkl', 'rb') as ty_file: |
63 | | - tY = pickle.load(ty_file) |
64 | | - |
65 | | - prep_x = tX.transform(X) |
66 | | - n_entries, _ = X.shape |
67 | | - prep_y_hat = smearing_model.predict( |
68 | | - np.c_[prep_x, np.random.normal(0, 1, (n_entries, 64))], |
69 | | - verbose=0, |
70 | | - batch_size=10000 |
71 | | - ) |
72 | | - |
73 | | - ret = invertColumnTransformer(tY, prep_y_hat) |
74 | | - return ret |
75 | | - |
76 | | - |
77 | | -@PyLamarr.function |
78 | | -def PyPhotons(db): |
79 | | - gen_photons = pd.read_sql_query(""" |
80 | | - SELECT gev.datasource_id AS event_id, p.*, v.* |
81 | | - FROM MCParticles AS p |
82 | | - JOIN MCVertices AS v |
83 | | - ON p.production_vertex == v.mcvertex_id |
84 | | - AND p.genevent_id == v.genevent_id |
85 | | - JOIN GenEvents AS gev |
86 | | - ON gev.genevent_id = p.genevent_id |
87 | | - WHERE |
88 | | - pid = 22 |
89 | | - AND |
90 | | - p.pe > 1000 |
91 | | - AND |
92 | | - abs(v.x) < 200 |
93 | | - AND |
94 | | - abs(v.y) < 200 |
95 | | - AND |
96 | | - abs(v.z) < 2000 |
97 | | - """, db) |
98 | | - |
99 | | - event_id = gen_photons.event_id.values |
100 | | - mcparticle_id = gen_photons.mcparticle_id.values |
101 | | - ovx, ovy, ovz = gen_photons[['x', 'y', 'z']].values.T |
102 | | - e, px, py, pz = gen_photons[['pe', 'px', 'py', 'pz']].values.T |
103 | | - |
104 | | - tx = px/pz |
105 | | - ty = py/pz |
106 | | - |
107 | | - ecal_z = 12689. # mm |
108 | | - ecal_x = ovx + tx * (ecal_z - ovz) |
109 | | - ecal_y = ovy + ty * (ecal_z - ovz) |
110 | | - |
111 | | - mask = ( |
112 | | - (ecal_x > -4e3) & (ecal_x < 4e3) & |
113 | | - (ecal_y > -4e3) & (ecal_y < 4e3) & |
114 | | - (tx > -0.35) & (tx < 0.35) & |
115 | | - (ty > -0.25) & (ty < 0.25) & |
116 | | - (pz > 0) & (pz < 200e3) |
117 | | - ) |
118 | | - |
119 | | - X_eff = np.c_[ecal_x, ecal_y, np.log(e/1e3), tx, ty, ovx, ovy, ovz, np.zeros_like(ecal_x)] |
120 | | - |
121 | | - efficiency = _eval_efficiency(X_eff) |
122 | | - r = np.random.uniform(0, 1, len(X_eff)) |
123 | | - |
124 | | - ceff = np.cumsum(efficiency, 1) |
125 | | - eff_as_photon = r < ceff[:,0] |
126 | | - eff_as_photon_from_pi0 = (r > ceff[:,0]) & (r < ceff[:,1]) |
127 | | - |
128 | | - X_res = np.c_[ecal_x, ecal_y, np.log(e/1e3), tx, ty, ovx, ovy, ovz, np.zeros_like(ecal_x), eff_as_photon, eff_as_photon_from_pi0] |
129 | | - dx, dy, de_rel, reco_PhotonID, reco_IsNotE, reco_IsNotH = _eval_smearing(X_res).T |
130 | | - |
131 | | - sigma_x = np.sqrt(np.exp(-1.65 * np.log(e) + 17.0)) |
132 | | - sigma_y = np.sqrt(np.exp(-1.65 * np.log(e) + 17.0)) |
133 | | - sigma_e = np.sqrt(np.exp(0.89 * np.log(e) + 5.1)) |
134 | | - |
135 | | - #IPython.embed() |
136 | | - |
137 | | - |
138 | | - clusters = pd.DataFrame(dict( |
139 | | - mask=mask & eff_as_photon, |
140 | | - event_id=event_id, |
141 | | - type=np.full_like(mcparticle_id, 4), |
142 | | - calocluster_id = mcparticle_id, |
143 | | - center_x = ecal_x,# + dx, #np.random.normal(ecal_x, sigma_x), |
144 | | - center_y = ecal_y,# + dy, #np.random.normal(ecal_y, sigma_y), |
145 | | - z = ecal_z, |
146 | | - # energy = e * (1. + de_rel), |
147 | | - energy = np.random.normal(e, sigma_e), |
148 | | - cov_xx = sigma_x * sigma_x, |
149 | | - cov_yy = sigma_y * sigma_y, |
150 | | - cov_ee = sigma_e * sigma_e, |
151 | | - )).query("mask").drop(columns=['mask']) |
152 | | - |
153 | | - clusters.to_sql("Cluster", db, if_exists='replace') |
154 | | - |
155 | | - cluster_info = pd.concat([ |
156 | | - pd.DataFrame(dict( |
157 | | - mask=eff_as_photon, |
158 | | - event_id=event_id, |
159 | | - calocluster_id=mcparticle_id, |
160 | | - info_key=np.full_like(mcparticle_id, info_key), |
161 | | - info_value=info_value |
162 | | - )).query("mask").drop(columns=['mask']) |
163 | | - for info_key, info_value in [ |
164 | | - (383, reco_IsNotH), |
165 | | - (382, reco_IsNotE), |
166 | | - (380, reco_PhotonID), |
167 | | - # These magic numbers come from https://lhcb-doxygen.web.cern.ch/lhcb-doxygen/davinci/v50r5/d2/d57/class_l_h_cb_1_1_proto_particle.html |
168 | | - ] |
169 | | - ]).sort_values('calocluster_id', ignore_index=True) |
170 | | - |
171 | | - cluster_info.to_sql("ClusterInfo", db, if_exists='replace') |
| 138 | + eff_as_photon = np.ones(len(X_eff), dtype=bool) |
| 139 | + eff_as_photon_from_pi0 = np.zeros(len(X_eff), dtype=bool) |
| 140 | + |
| 141 | + if self.resolution_model not in [None, "None", "none", "null"]: |
| 142 | + X_res = np.c_[ecal_x, ecal_y, np.log(e/1e3), tx, ty, ovx, ovy, ovz, np.zeros_like(ecal_x), eff_as_photon, eff_as_photon_from_pi0] |
| 143 | + dx, dy, de_rel, reco_PhotonID, reco_IsNotE, reco_IsNotH = self._eval_smearing(X_res).T |
| 144 | + |
| 145 | + sigma_x = np.sqrt(np.exp(-1.65 * np.log(e) + 17.0)) |
| 146 | + sigma_y = np.sqrt(np.exp(-1.65 * np.log(e) + 17.0)) |
| 147 | + sigma_e = np.sqrt(np.exp(0.89 * np.log(e) + 5.1)) |
| 148 | + else: |
| 149 | + dx, dy, de_rel, reco_PhotonID, reco_IsNotE, reco_IsNotH = np.zeros((6, len(e))) |
| 150 | + sigma_x, sigma_y, sigma_e = np.zeros((3, len(e))) |
| 151 | + |
| 152 | + #IPython.embed() |
| 153 | + |
| 154 | + clusters = pd.DataFrame(dict( |
| 155 | + mask=mask & eff_as_photon, |
| 156 | + event_id=event_id, |
| 157 | + type=np.full_like(mcparticle_id, 4), |
| 158 | + calocluster_id = mcparticle_id, |
| 159 | + center_x = ecal_x,# + dx, #np.random.normal(ecal_x, sigma_x), |
| 160 | + center_y = ecal_y,# + dy, #np.random.normal(ecal_y, sigma_y), |
| 161 | + z = ecal_z, |
| 162 | + # energy = e * (1. + de_rel), |
| 163 | + energy = np.random.normal(e, sigma_e), |
| 164 | + cov_xx = sigma_x * sigma_x, |
| 165 | + cov_yy = sigma_y * sigma_y, |
| 166 | + cov_ee = sigma_e * sigma_e, |
| 167 | + )).query("mask").drop(columns=['mask']) |
| 168 | + |
| 169 | + clusters.to_sql("Cluster", db, if_exists='replace') |
| 170 | + |
| 171 | + cluster_info = pd.concat([ |
| 172 | + pd.DataFrame(dict( |
| 173 | + mask=eff_as_photon, |
| 174 | + event_id=event_id, |
| 175 | + calocluster_id=mcparticle_id, |
| 176 | + info_key=np.full_like(mcparticle_id, info_key), |
| 177 | + info_value=info_value |
| 178 | + )).query("mask").drop(columns=['mask']) |
| 179 | + for info_key, info_value in [ |
| 180 | + (383, reco_IsNotH), |
| 181 | + (382, reco_IsNotE), |
| 182 | + (380, reco_PhotonID), |
| 183 | + # These magic numbers come from https://lhcb-doxygen.web.cern.ch/lhcb-doxygen/davinci/v50r5/d2/d57/class_l_h_cb_1_1_proto_particle.html |
| 184 | + ] |
| 185 | + ]).sort_values('calocluster_id', ignore_index=True) |
| 186 | + |
| 187 | + cluster_info.to_sql("ClusterInfo", db, if_exists='replace') |
172 | 188 |
|
0 commit comments