Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions columnflow/production/cms/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def pdf_weights(
_raise_unknown_action("outlier_log_mode", outlier_log_mode, ("none", "info", "debug", "warning"))

# check for the correct amount of weights
n_weights = ak.num(events.LHEPdfWeight, axis=1)
n_weights = ak.num(ak.drop_none(ak.nan_to_none(events.LHEPdfWeight)), axis=1)
invalid_mask = (n_weights != 101) & (n_weights != 103)

# handle invalid number of weights when configured to raise
Expand Down Expand Up @@ -120,7 +120,7 @@ def pdf_weights(
frac = ak.sum(invalid_mask) / len(events) * 100
logger.warning(
"the number of LHEPdfWeights is expected to be 101 or 103, but also found values "
f"'{bad_values}' in dataset {self.dataset_inst.name}, will set pdf weights to 1 for "
f"'{bad_values}' in dataset {self.dataset_inst.name}, will set pdf weights to 0 for "
f"these events ({frac:.2f}%)",
)

Expand Down Expand Up @@ -205,7 +205,34 @@ def pdf_weights(
}[outlier_log_mode]
msg_func(msg)

# handle invalid values
if ak.any(invalid_mask) & ~ak.all(invalid_mask):
# catch events where the number of weights is unexpected
occurances = ak.sum(invalid_mask)
frac = occurances / len(stddev) * 100
msg = (
f"in dataset {self.dataset_inst.name}, there are {occurances} ({frac:.2f}%) "
"events where the number of (non Nan) weights is unexpected"
)

if outlier_action == "remove":
# set all pdf weights to 0 when the *outlier_threshold* is passed
events = set_ak_column_f32(events, "pdf_weight", ak.where(invalid_mask, 0, events.pdf_weight))
events = set_ak_column_f32(events, "pdf_weight_up", ak.where(invalid_mask, 0, events.pdf_weight_up))
events = set_ak_column_f32(events, "pdf_weight_down", ak.where(invalid_mask, 0, events.pdf_weight_down))

msg += "; the nominal/up/down pdf_weight columns have been set to 0 for these events"
elif outlier_action == "raise":
raise Exception(msg)

msg_func = {
"none": lambda msg: None,
"info": logger.info,
"warning": logger.warning,
"debug": logger.debug,
}[outlier_log_mode]
msg_func(msg)


invalid_pdf_weight = (pdf_weight_nominal == 0)
if ak.any(invalid_pdf_weight):
# set all pdf weights to 0 when the nominal pdf weight is 0
Expand Down
32 changes: 32 additions & 0 deletions columnflow/production/cms/scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,21 @@ def murmuf_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
weights[non_zero_mask] = murf_weights[:, indices[index_name]]
events = set_ak_column_f32(events, column, weights)

# check if LHEScaleWeight is Nan in specific events and set weights to 0
has_nan_values = ak.all(ak.is_none(ak.nan_to_none(events.LHEScaleWeight), axis=-1), axis=-1)
if ak.any(has_nan_values):
logger.warning(
f"All values of LHEScaleWeights are Nan in {ak.sum(has_nan_values)} events. "
r"Saving zeros for '{murmuf,mur,muf}_weight' of these events",
)
for postfix in ["", "_up", "_down"]:
events = set_ak_column_f32(events, f"murmuf_weight{postfix}", ak.where(
has_nan_values, 0, events[f"murmuf_weight{postfix}"]))
events = set_ak_column_f32(events, f"mur_weight{postfix}", ak.where(
has_nan_values, 0, events[f"mur_weight{postfix}"]))
events = set_ak_column_f32(events, f"muf_weight{postfix}", ak.where(
has_nan_values, 0, events[f"muf_weight{postfix}"]))

return events


Expand All @@ -167,6 +182,8 @@ def murmuf_envelope_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
Resources:
- https://cms-nanoaod-integration.web.cern.ch/integration/master/mc94X_doc.html
"""

# remove nan values in LHEScaleWeight columns for checking number of available weights
n_weights = ak.num(events.LHEScaleWeight, axis=1)

# in rare cases, some events might have 0 weights
Expand Down Expand Up @@ -218,6 +235,21 @@ def murmuf_envelope_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
events = set_ak_column_f32(events, "murmuf_envelope_weight_down", ak.min(murf_weights, axis=1))
events = set_ak_column_f32(events, "murmuf_envelope_weight_up", ak.max(murf_weights, axis=1))

# check if LHEScaleWeights are Nan in specific events and set weights to 0
has_nan_values = ak.all(ak.is_none(ak.nan_to_none(events.LHEScaleWeight), axis=-1), axis=-1)
if ak.any(has_nan_values):
logger.warning(
f"All values of LHEScaleWeights are Nan in {ak.sum(has_nan_values)} events. "
r"Saving zeros for '{murmuf,mur,muf}_weight' of these events",
)
for postfix in ["", "_up", "_down"]:
events = set_ak_column_f32(events, "murmuf_envelope_weight", ak.where(
has_nan_values, 0, events.murmuf_envelope_weight))
events = set_ak_column_f32(events, "murmuf_envelope_weight_down", ak.where(
has_nan_values, 0, events.murmuf_envelope_weight_down))
events = set_ak_column_f32(events, "murmuf_envelope_weight_up", ak.where(
has_nan_values, 0, events.murmuf_envelope_weight_up))

return events


Expand Down
Loading