Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes/2203.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add schema for CORSIKA limits tables.
1 change: 1 addition & 0 deletions docs/changes/2219.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add glob-pattern collection, multi-output-path support, and zenith/azimuth/NSB-encoded plot filenames to `simtools-production-derive-corsika-limits`.
2 changes: 0 additions & 2 deletions src/simtools/applications/production_derive_corsika_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@
+===========================+===========+========+==============================================+
| production_index | int64 | | Production index for multi-production runs. |
+---------------------------+-----------+--------+----------------------------------------------+
| event_data_file | string | | Input event-data pattern for this row. |
+---------------------------+-----------+--------+----------------------------------------------+
| primary_particle | string | | Particle type (e.g., gamma, proton). |
+---------------------------+-----------+--------+----------------------------------------------+
| array_name | string | | Array name (custom or as defined in |
Expand Down
87 changes: 40 additions & 47 deletions src/simtools/production_configuration/derive_corsika_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
from astropy.table import Column, Table

from simtools.constants import SCHEMA_PATH
from simtools.data_model.metadata_collector import MetadataCollector
from simtools.io import ascii_handler, io_handler
from simtools.job_execution.process_pool import process_pool_map_ordered
Expand All @@ -23,36 +24,38 @@

_logger = logging.getLogger(__name__)

FILE_INFO_KEYS = ("primary_particle", "zenith", "azimuth", "nsb_level")
BROAD_RANGE_FILE_INFO_KEYS = {
"br_energy_min": "energy_min",
"br_energy_max": "energy_max",
"br_core_scatter_max": "core_scatter_max",
"br_viewcone_max": "viewcone_max",
}
COLUMN_DESCRIPTIONS = {
"br_energy_min": "Energy min from broad-range simulations.",
"br_energy_max": "Energy max from broad-range simulations.",
"br_core_scatter_max": "Core scatter max from broad-range simulations.",
"br_viewcone_max": "Viewcone max from broad-range simulations.",
}
CORSIKA_LIMITS_TABLE_SCHEMA_FILE = SCHEMA_PATH / "corsika_limits_table.schema.yml"


def _load_output_table_configuration_from_schema(schema_file):
"""Load output table columns, descriptions, and file-info mappings from schema."""
schema_data = ascii_handler.collect_data_from_file(file_name=schema_file)
data_entries = schema_data.get("data", [])
if not data_entries:
raise KeyError(f"No 'data' entry found in schema {schema_file}")

table_columns = data_entries[0].get("table_columns", [])
if not table_columns:
raise KeyError(f"No 'table_columns' entry found in schema {schema_file}")

result_columns = [entry["name"] for entry in table_columns]
column_descriptions = {
entry["name"]: entry.get("description")
for entry in table_columns
if entry.get("description") is not None
}
file_info_columns = {
entry["name"]: entry["file_info_key"]
for entry in table_columns
if entry.get("file_info_key") is not None
}
return result_columns, column_descriptions, file_info_columns


RESULT_COLUMNS, COLUMN_DESCRIPTIONS, FILE_INFO_COLUMNS = (
_load_output_table_configuration_from_schema(CORSIKA_LIMITS_TABLE_SCHEMA_FILE)
)
LOSS_AXES = ("core_distance", "angular_distance")
RESULT_COLUMNS = [
"production_index",
"event_data_file",
"primary_particle",
"array_name",
"zenith",
"azimuth",
"nsb_level",
"lower_energy_limit",
"upper_radius_limit",
"viewcone_radius",
"br_energy_min",
"br_energy_max",
"br_core_scatter_max",
"br_viewcone_max",
]


def _normalize_event_data_file(event_data_file):
Expand Down Expand Up @@ -96,27 +99,18 @@ def _get_production_directory_name(production_pattern, existing_names=None):
str
Safe directory name (e.g., "production_prod_a_events").
"""
pattern_path = Path(production_pattern)
parts = []

if pattern_path.parent.name and pattern_path.parent.name != ".":
parts.append(pattern_path.parent.name)
if pattern_path.stem:
parts.append(pattern_path.stem)

readable_name = "_".join(parts) if parts else "production"
readable_name = re.sub(r"[^A-Za-z0-9]+", "_", readable_name)
readable_name = readable_name.strip("_")
readable_name = re.sub(r"_+", "_", readable_name)

if not readable_name:
readable_name = "production"
def _sanitize(name):
name = re.sub(r"[^A-Za-z0-9]+", "_", name)
return re.sub(r"_+", "_", name).strip("_")

pattern_path = Path(production_pattern)
parent_name = _sanitize(pattern_path.parent.name) if pattern_path.parent.name != "." else ""
readable_name = parent_name or _sanitize(pattern_path.stem) or "production"
base_name = f"production_{readable_name}"

if existing_names is None or base_name not in existing_names:
return base_name

return f"{base_name}_{get_uuid()}"


Expand Down Expand Up @@ -408,11 +402,10 @@ def _process_file(
differential_loss_bins_per_decade,
)
)
limits.update({key: histograms.file_info.get(key) for key in FILE_INFO_KEYS})
limits.update(
{
output_key: histograms.file_info.get(file_info_key)
for output_key, file_info_key in BROAD_RANGE_FILE_INFO_KEYS.items()
column_name: histograms.file_info.get(file_info_key)
for column_name, file_info_key in FILE_INFO_COLUMNS.items()
}
)

Expand Down
112 changes: 87 additions & 25 deletions src/simtools/runners/simtools_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,22 +100,64 @@ def run_applications(args_dict):


def _copy_collection_files(configurations, collection_config):
"""Copy listed files from application output paths to collection output path."""
if not collection_config:
return
"""Copy listed files from application output paths to one or more collection output paths.

Parameters
----------
configurations : list[dict]
Application configurations from the workflow config file.
collection_config : dict or list[dict] or None
A single collection entry (``{output_path, files}``) or a list of such
entries. ``None`` or an empty value is silently ignored.

output_path = collection_config.get("output_path")
files = collection_config.get("files") or []
if output_path is None or not files:
Raises
------
FileExistsError
When two different source files would produce the same basename in an
output directory.
"""
if not collection_config:
return
if isinstance(collection_config, dict):
collection_config = [collection_config]
Comment on lines +121 to +122

source_directories = _collect_source_directories(configurations)
collection_output_path = Path(output_path)
collection_output_path.mkdir(parents=True, exist_ok=True)
for entry in collection_config:
output_path = entry.get("output_path")
files = entry.get("files") or []
if output_path is None or not files:
continue
collection_output_path = Path(output_path)
collection_output_path.mkdir(parents=True, exist_ok=True)
for pattern in files:
_copy_pattern_files(pattern, source_directories, collection_output_path)


def _copy_pattern_files(pattern, source_directories, destination):
"""Copy all files matching *pattern* from source directories into *destination*.

Parameters
----------
pattern : str
Filename or glob pattern to search for.
source_directories : list[Path]
Directories to search.
destination : Path
Target directory (must already exist).

for file_name in files:
source_file = _find_collection_file(file_name, source_directories)
shutil.copy2(source_file, collection_output_path / file_name)
Raises
------
FileExistsError
When a source file would overwrite a different file with the same name.
"""
for source_file in _find_collection_files(pattern, source_directories):
dest = destination / source_file.name
if dest.exists() and dest.resolve() != source_file.resolve():
raise FileExistsError(
f"Filename collision in collection: '{source_file.name}' would be "
f"overwritten by '{source_file}'. Ensure output files have unique names."
)
shutil.copy2(source_file, dest)


def _collect_source_directories(configurations):
Expand All @@ -130,33 +172,53 @@ def _collect_source_directories(configurations):
return source_directories


def _find_collection_file(file_name, source_directories):
"""Find a named file in the list of source directories.
def _find_collection_files(pattern, source_directories):
"""Find files matching a name or glob pattern in the list of source directories.

For literal filenames (no wildcard characters), the existing exact-match
semantics are preserved: the first directory that contains the file wins and
a :exc:`FileNotFoundError` is raised when no match is found.

For glob patterns (containing ``*``, ``?``, or ``[``), all source
directories are searched recursively and all matching regular files are
returned in sorted order. A :obj:`logging.WARNING` is emitted when a glob
pattern yields no matches (rather than raising an error, because some
patterns are legitimately optional).

Parameters
----------
file_name : str
File name to locate.
source_directories : list
Directories to search in order.
pattern : str
Filename or glob pattern (e.g. ``"result.ecsv"`` or
``"angular_distance_vs_energy_*.png"``).
source_directories : list[Path]
Directories to search.

Returns
-------
Path
Path to the found file.
list[Path]
Matched files. Empty list only when *pattern* is a glob and no files
match.

Raises
------
FileNotFoundError
If the file is not found in any source directory.
If *pattern* is a literal filename and is not found in any source
directory.
"""
is_glob = any(c in pattern for c in ("*", "?", "["))
if is_glob:
matched = sorted(f for d in source_directories for f in d.rglob(pattern) if f.is_file())
if not matched:
logger.warning(
f"No files matched collection pattern '{pattern}' in {source_directories}."
)
return matched

for source_directory in source_directories:
candidate = source_directory / file_name
candidate = source_directory / pattern
if candidate.exists():
return candidate
raise FileNotFoundError(
f"Could not find collection file '{file_name}' in {source_directories}."
)
return [candidate]
raise FileNotFoundError(f"Could not find collection file '{pattern}' in {source_directories}.")


def _append_metadata_file(model_parameter_metadata_files, metadata_file):
Expand Down
86 changes: 86 additions & 0 deletions src/simtools/schemas/corsika_limits_table.schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
%YAML 1.2
---
title: Schema for production_derive_corsika_limits output
schema_version: 0.1.0
meta_schema: simpipe-schema
meta_schema_url: https://raw.githubusercontent.com/gammasim/simtools/main/src/simtools/schemas/model_parameter_and_data_schema.metaschema.yml
meta_schema_version: 0.1.0
name: production_derive_corsika_limits
description: |-
Derived CORSIKA limits for energy threshold, core distance, and viewcone radius.
data:
- type: data_table
table_columns:
- name: production_index
description: Production index for multi-production runs.
required: true
unit: dimensionless
type: int64
- name: primary_particle
description: Primary particle type.
required: true
unit: dimensionless
type: string
file_info_key: primary_particle
- name: array_name
description: Array layout or array identifier.
required: true
unit: dimensionless
type: string
- name: zenith
description: Direction of array pointing zenith.
required: true
unit: deg
type: float64
file_info_key: zenith
- name: azimuth
description: Direction of array pointing azimuth.
required: true
unit: deg
type: float64
file_info_key: azimuth
- name: nsb_level
description: Night sky background level.
required: true
unit: dimensionless
type: float64
file_info_key: nsb_level
- name: lower_energy_limit
description: Derived lower energy limit.
required: true
unit: TeV
type: float64
- name: upper_radius_limit
description: Derived upper core distance limit.
required: true
unit: m
type: float64
- name: viewcone_radius
description: Derived viewcone radius limit.
required: true
unit: deg
type: float64
- name: br_energy_min
description: Broad-range simulation minimum energy.
required: false
unit: TeV
type: float64
file_info_key: energy_min
- name: br_energy_max
description: Broad-range simulation maximum energy.
required: false
unit: TeV
type: float64
file_info_key: energy_max
- name: br_core_scatter_max
description: Broad-range simulation core scatter maximum.
required: false
unit: m
type: float64
file_info_key: core_scatter_max
- name: br_viewcone_max
description: Broad-range simulation viewcone maximum.
required: false
unit: deg
type: float64
file_info_key: viewcone_max
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ definitions:
type: array
items:
$ref: '#/definitions/TableColumn'
file_info_key:
type: string
description: "key in event-file metadata used to populate this table column"
required:
- description
- name
Expand Down
Loading
Loading