diff --git a/.github/conda/bld.bat b/.github/conda/bld.bat deleted file mode 100644 index 89481145..00000000 --- a/.github/conda/bld.bat +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/.github/conda/build.sh b/.github/conda/build.sh deleted file mode 100644 index 89481145..00000000 --- a/.github/conda/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/.github/conda/conda_build_config.yaml b/.github/conda/conda_build_config.yaml deleted file mode 100644 index 49777cb0..00000000 --- a/.github/conda/conda_build_config.yaml +++ /dev/null @@ -1,7 +0,0 @@ -python: - - 3.13 - - 3.12 - - 3.11 - - 3.10 - - diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml deleted file mode 100644 index b78f32b9..00000000 --- a/.github/conda/meta.yaml +++ /dev/null @@ -1,56 +0,0 @@ -{% set data = load_setup_py_data() %} - -package: - name: ctlearn - version: {{ data.get('version') }} -source: - path: ../.. - -build: - #noarch: generic - number: 0 - -requirements: - build: - - python #==3.12 - - numpy >=1.20 - - setuptools - - astropy - - scipy - - jupyter - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - host: - - python #==3.12 - - numpy >=1.20 - - astropy - - setuptools - - scipy - - jupyter - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - run: - - python #==3.12 - - numpy >=1.20 - - jupyter - - setuptools - - astropy - - scipy - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - - test: - imports: - - ctlearn -about: - home: https://github.com/ctlearn-project/ctlearn/ - license: BSD3-Clause - license_file: LICENSE - summary: Deep Learning for IACT Event Reconstruction. -extra: - recipe-maintainers: - - TjarkMiener - - nietootein diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index c2c8c558..da00acbf 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -1,4 +1,3 @@ - name: CI on: @@ -15,39 +14,61 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: [ '3.10','3.11', '3.12', '3.13'] - max-parallel: 5 + python-version: ['3.12', '3.13', '3.14'] + dl1dh-version: ['latest', 'nightly'] + max-parallel: 6 runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.dl1dh-version == 'nightly' || matrix.python-version == '3.14' }} steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.pyv }} - run: | - # Install Miniconda - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - echo "$HOME/miniconda/bin" >> $GITHUB_PATH - source $HOME/miniconda/bin/activate - # Install Mamba via conda (since we don't have mamba yet) - $HOME/miniconda/bin/conda config --add channels conda-forge - $HOME/miniconda/bin/conda install -y mamba=2.0.8 - mamba install -y python=${{ matrix.pyv }} - - name: Add MKL_THREADING_LAYER variable - run: echo "MKL_THREADING_LAYER=GNU" >> $GITHUB_ENV - - name: Install dependencies with Mamba - run: | - source $HOME/miniconda/bin/activate - mamba env update --file environment.yml --name base - - name: Lint with flake8 - run: | - source $HOME/miniconda/bin/activate - mamba install flake8 - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Install with pip - run: | - pip install -e . - - name: Test with pytest - run: | - source $HOME/miniconda/bin/activate - mamba install pytest - pytest + - uses: actions/checkout@v4 + + - name: Set up Miniconda + run: | + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda + echo "$HOME/miniconda/bin" >> $GITHUB_PATH + source $HOME/miniconda/etc/profile.d/conda.sh + conda config --add channels conda-forge + conda install -y mamba + + - name: Create Python environment + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba create -y -n ctlearn python==${{ matrix.python-version }} -c conda-forge + conda activate ctlearn + + - name: Install dependencies + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + conda activate ctlearn + sudo apt-get update + sudo apt-get install -y git + pip install --upgrade pip + pip install pylint pylint-exit anybadge eventio pytest flake8 + if [ "${{ matrix.dl1dh-version }}" = "nightly" ]; then + pip install git+https://github.com/cta-observatory/dl1-data-handler.git + else + pip install dl1-data-handler + fi + + - name: Add MKL_THREADING_LAYER variable + run: echo "MKL_THREADING_LAYER=GNU" >> $GITHUB_ENV + + - name: Lint with flake8 + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + conda activate ctlearn + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Install package with pip + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + conda activate ctlearn + pip install -e . + + - name: Run pytest + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + conda activate ctlearn + pytest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 646613b1..984895f4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,13 +2,17 @@ name: Release CD on: release: - types: [published] + types: [published] workflow_dispatch: + pull_request: + paths: + - '.github/workflows/**' + jobs: pypi-publish: name: Publish release to PyPI environment: - name: pypi + name: pypi url: https://pypi.org/project/ctlearn/ permissions: id-token: write @@ -20,64 +24,29 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: ['3.10'] + pyv: ['3.12'] max-parallel: 5 runs-on: ${{ matrix.os }} + steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.pyv }} - run: | - conda install -y python=${{ matrix.pyv }} - - - name: Add conda to system path - run: | - #$CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - - name: Install dependencies - run: | - conda env update --file environment.yml --name base - - - name: Build package - run: | - python --version - pip install -U build - python -m build - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + - uses: actions/checkout@v4 + - name: Set up micromamba (Python ${{ matrix.pyv }}) + uses: mamba-org/setup-micromamba@v1 + with: + environment-name: ctlearn + create-args: >- + python=${{ matrix.pyv }} + pip + cache-environment: true - condapublish: - strategy: - matrix: - os: [ubuntu-22.04] - pyv: ["3.10"] - max-parallel: 5 - runs-on: ${{ matrix.os }} - permissions: - id-token: write - contents: write - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Python ${{ matrix.pyv }} - run: | - conda install -y python=${{ matrix.pyv }} - - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - - name: Install dependencies - run: | - conda env update --file environment.yml --name base - sudo apt-get install python3-numpy - - - name: publish-to-conda - uses: fcakyon/conda-publish-action@v1.3 - with: - subdir: '.github/conda' - anacondatoken: ${{ secrets.ANACONDA_TOKEN }} + - name: Build package + shell: micromamba-shell {0} + run: | + python --version + pip install -U build + python -m build + + - name: Publish package distributions to PyPI + if: github.event_name == 'release' + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9d5aecee..71fb5123 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,7 +6,7 @@ sphinx: build: os: ubuntu-22.04 tools: - python: "3.10" + python: "3.12" python: install: diff --git a/README.rst b/README.rst index 81556e43..f69dbb25 100644 --- a/README.rst +++ b/README.rst @@ -28,22 +28,22 @@ CTLearn is a package under active development to run deep learning models to ana Installation for users ---------------------- -Download and install `Anaconda `_\ , or, for a minimal installation, `Miniconda `_. -The following command will set up a conda virtual environment, add the -necessary package channels, and install CTLearn specified version and its dependencies: +Installation +------------ + +First, create and activate a fresh conda environment: .. code-block:: bash - CTLEARN_VER=0.10.3 - wget https://raw.githubusercontent.com/ctlearn-project/ctlearn/v$CTLEARN_VER/environment.yml - conda env create -n [ENVIRONMENT_NAME] -f environment.yml - conda activate [ENVIRONMENT_NAME] - pip install ctlearn==$CTLEARN_VER - ctlearn -h + mamba create -n ctlearn -c conda-forge python==3.12 llvmlite + mamba activate ctlearn + +The lastest version fo this package can be installed as a pip package: +.. code-block:: bash -This should automatically install all dependencies (NOTE: this may take some time, as by default MKL is included as a dependency of NumPy and it is very large). + pip install ctlearn See the documentation for further information like `installation instructions for developers `_, `package usage `_, and `dependencies `_ among other topics. diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index d912ae18..63226540 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -1,16 +1,23 @@ """ common pytest fixtures for tests in ctlearn. -Credits to ctapipe for the original code. """ import pytest +import shutil from ctapipe.core import run_tool from ctapipe.utils import get_dataset_path +from ctlearn.tools import TrainCTLearnModel @pytest.fixture(scope="session") -def prod5_gamma_simtel_path(): - return get_dataset_path("gamma_prod5.simtel.zst") +def gamma_simtel_path(): + return get_dataset_path("gamma_test_large.simtel.gz") + +@pytest.fixture(scope="session") +def proton_simtel_path(): + return get_dataset_path( + "proton_20deg_0deg_run4___cta-prod5-paranal_desert-2147m-Paranal-dark-100evts.simtel.zst" + ) @pytest.fixture(scope="session") def dl1_tmp_path(tmp_path_factory): @@ -23,25 +30,41 @@ def r1_tmp_path(tmp_path_factory): return tmp_path_factory.mktemp("r1_") @pytest.fixture(scope="session") -def dl1_gamma_file(dl1_tmp_path, prod5_gamma_simtel_path): +def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ DL1 file containing both images and parameters from a gamma simulation set. """ from ctapipe.tools.process import ProcessorTool output = dl1_tmp_path / "gamma.dl1.h5" + argv = [ + f"--input={gamma_simtel_path}", + f"--output={output}", + "--write-images", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", + ] + assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 + return output + +@pytest.fixture(scope="session") +def dl1_proton_file(dl1_tmp_path, proton_simtel_path): + """ + DL1 file containing both images and parameters from a proton simulation set. + """ + from ctapipe.tools.process import ProcessorTool + output = dl1_tmp_path / "proton.dl1.h5" argv = [ - f"--input={prod5_gamma_simtel_path}", + f"--input={proton_simtel_path}", f"--output={output}", "--write-images", - "--DataWriter.Contact.name=αℓℓ the äüöß", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @pytest.fixture(scope="session") -def r1_gamma_file(r1_tmp_path, prod5_gamma_simtel_path): +def r1_gamma_file(r1_tmp_path, gamma_simtel_path): """ R1 file containing both waveforms and parameters from a gamma simulation set. """ @@ -50,10 +73,64 @@ def r1_gamma_file(r1_tmp_path, prod5_gamma_simtel_path): output = r1_tmp_path / "gamma.r1.h5" argv = [ - f"--input={prod5_gamma_simtel_path}", + f"--input={gamma_simtel_path}", f"--output={output}", f"--DataWriter.write_r1_waveforms=True", - "--DataWriter.Contact.name=αℓℓ the äüöß", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 - return output \ No newline at end of file + return output + +@pytest.fixture(scope="session") +def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + tmp_path = tmp_path_factory.mktemp("ctlearn_models") + + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) + + ctlearn_trained_dl1_models = {} + for reco_task in ["type", "energy", "cameradirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{reco_task}" + + # Build command-line arguments + allowed_tels = [7, 13, 15, 16, 17, 19] + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ]) + + # Run training + assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 + + ctlearn_trained_dl1_models[reco_task] = output_dir / "ctlearn_model.keras" + # Check that the trained model exists + assert ctlearn_trained_dl1_models[reco_task].exists() + return ctlearn_trained_dl1_models \ No newline at end of file diff --git a/ctlearn/core/loader.py b/ctlearn/core/loader.py index 5b723f9c..92fd96c2 100644 --- a/ctlearn/core/loader.py +++ b/ctlearn/core/loader.py @@ -155,7 +155,7 @@ def _get_mono_item(self, batch): """ # Retrieve the telescope images and store in the features dictionary labels = {} - features = {"input": batch["features"].data} + features = batch["features"].data if "type" in self.tasks: labels["type"] = to_categorical( batch["true_shower_primary_class"].data, @@ -186,9 +186,6 @@ def _get_mono_item(self, batch): ), axis=1, ) - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - features = features["input"] return features, labels def _get_stereo_item(self, batch): @@ -303,13 +300,10 @@ def _get_stereo_item(self, batch): ) # Store the fatures in the features dictionary if "features" in batch.colnames: - features = {"input": np.array(features)} + features = np.array(features) # TDOO: Add support for both feature vectors if "mono_feature_vectors" in batch.colnames: - features = {"input": np.array(mono_feature_vectors)} + features = np.array(mono_feature_vectors) if "stereo_feature_vectors" in batch.colnames: - features = {"input": np.array(stereo_feature_vectors)} - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - features = features["input"] + features = np.array(stereo_feature_vectors) return features, labels diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py index 07c79d46..bfac48c2 100644 --- a/ctlearn/core/model.py +++ b/ctlearn/core/model.py @@ -288,7 +288,7 @@ def _build_backbone(self, input_shape): """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Get model arcihtecture parameters for the backbone filters_list = [layer["filters"] for layer in self.architecture] kernel_sizes = [layer["kernel_size"] for layer in self.architecture] @@ -472,7 +472,7 @@ def _build_backbone(self, input_shape): Keras input layer object for the backbone model. """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Apply initial padding if specified if self.init_padding > 0: network_input = keras.layers.ZeroPadding2D( @@ -880,7 +880,7 @@ def _build_backbone(self, input_shape): """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Set the backbone model to be trainable or not for layer in self.model.layers: if layer.name.endswith("_block"): diff --git a/ctlearn/core/tests/test_loader.py b/ctlearn/core/tests/test_loader.py index 97f87172..7fe71900 100644 --- a/ctlearn/core/tests/test_loader.py +++ b/ctlearn/core/tests/test_loader.py @@ -1,17 +1,17 @@ -import pytest from traitlets.config.loader import Config from dl1_data_handler.reader import DLImageReader from ctlearn.core.loader import DLDataLoader -def test_data_loader(dl1_tmp_path, dl1_gamma_file): +def test_data_loader(dl1_gamma_file): """check""" # Create a configuration suitable for the test config = Config( { "DLImageReader": { "allowed_tels": [4], + "focal_length_choice": "EQUIVALENT", }, } ) @@ -34,4 +34,4 @@ def test_data_loader(dl1_tmp_path, dl1_gamma_file): and "skydirection" in labels ) # Check the shape of the features - assert features["input"].shape == (1, 110, 110, 2) + assert features.shape == (1, 110, 110, 2) diff --git a/ctlearn/tools/__init__.py b/ctlearn/tools/__init__.py index 996469a1..d54df32d 100644 --- a/ctlearn/tools/__init__.py +++ b/ctlearn/tools/__init__.py @@ -1,2 +1,13 @@ """ctlearn command line tools. """ + +from .train_model import TrainCTLearnModel +from .predict_LST1 import LST1PredictionTool +from .predict_model import MonoPredictCTLearnModel, StereoPredictCTLearnModel + +__all__ = [ + "TrainCTLearnModel", + "LST1PredictionTool", + "MonoPredictCTLearnModel", + "StereoPredictCTLearnModel" +] \ No newline at end of file diff --git a/ctlearn/tools/predict_LST1.py b/ctlearn/tools/predict_LST1.py index e27e68c6..a4751d9a 100644 --- a/ctlearn/tools/predict_LST1.py +++ b/ctlearn/tools/predict_LST1.py @@ -120,7 +120,7 @@ class LST1PredictionTool(Tool): load_type_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the classification of the primary particle type." ), allow_none=True, @@ -132,7 +132,7 @@ class LST1PredictionTool(Tool): load_energy_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the regression of the primary particle energy." ), allow_none=True, @@ -144,7 +144,7 @@ class LST1PredictionTool(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the regression of the primary particle arrival direction " "based on the camera coordinate offsets." ), @@ -514,9 +514,6 @@ def start(self): image = get_unmapped_image(event, self.channels, self.transforms) data.append(self.image_mapper.map_image(image)) input_data = {"input": np.array(data)} - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - input_data = input_data["input"] event_id.extend(dl1_table["event_id"].data) tel_azimuth.extend(dl1_table["tel_az"].data) diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index 0eaa8b23..d0f3649e 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -6,6 +6,7 @@ import pathlib import numpy as np import os +import tables import tensorflow as tf import keras @@ -43,7 +44,50 @@ classes_with_traits, ) from ctapipe.monitoring.interpolation import PointingInterpolator +from ctapipe.instrument import SubarrayDescription from ctapipe.io import read_table, write_table, HDF5Merger +from ctapipe.io.hdf5dataformat import ( + DL0_TEL_POINTING_GROUP, + DL1_CAMERA_COEFFICIENTS_GROUP, + DL1_COLUMN_NAMES, + DL1_IMAGE_STATISTICS_TABLE, + DL1_PIXEL_STATISTICS_GROUP, + DL1_SUBARRAY_GROUP, + DL1_SUBARRAY_POINTING_GROUP, + DL1_SUBARRAY_TRIGGER_TABLE, + DL1_TEL_GROUP, + DL1_TEL_CALIBRATION_GROUP, + DL1_TEL_ILLUMINATOR_THROUGHPUT_GROUP, + DL1_TEL_IMAGES_GROUP, + DL1_TEL_MUON_GROUP, + DL1_TEL_MUON_THROUGHPUT_GROUP, + DL1_TEL_OPTICAL_PSF_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_TRIGGER_TABLE, + DL2_EVENT_STATISTICS_GROUP, + DL2_SUBARRAY_CROSS_CALIBRATION_GROUP, + DL2_SUBARRAY_INTER_CALIBRATION_GROUP, + DL2_TEL_GROUP, + FIXED_POINTING_GROUP, + OBSERVATION_BLOCK_TABLE, + R0_TEL_GROUP, + R1_TEL_GROUP, + SCHEDULING_BLOCK_TABLE, + SHOWER_DISTRIBUTION_TABLE, + SIMULATION_GROUP, + SIMULATION_IMAGES_GROUP, + SIMULATION_IMPACT_GROUP, + SIMULATION_PARAMETERS_GROUP, + SIMULATION_RUN_TABLE, + SIMULATION_SHOWER_TABLE, + DL2_TEL_PARTICLETYPE_GROUP, + DL2_TEL_ENERGY_GROUP, + DL2_TEL_GEOMETRY_GROUP, + DL2_SUBARRAY_PARTICLETYPE_GROUP, + DL2_SUBARRAY_ENERGY_GROUP, + DL2_SUBARRAY_GEOMETRY_GROUP, +) from ctapipe.reco.reconstructor import ReconstructionProperty from ctapipe.reco.stereo_combination import StereoCombiner from ctapipe.reco.utils import add_defaults_and_meta @@ -54,24 +98,30 @@ ) from ctlearn.core.loader import DLDataLoader -SIMULATION_CONFIG_TABLE = "/configuration/simulation/run" -FIXED_POINTING_GROUP = "/configuration/telescope/pointing" -POINTING_GROUP = "/dl1/monitoring/telescope/pointing" -SUBARRAY_POINTING_GROUP = "/dl1/monitoring/subarray/pointing" -DL1_TELESCOPE_GROUP = "/dl1/event/telescope" -DL1_SUBARRAY_GROUP = "/dl1/event/subarray" -DL2_SUBARRAY_GROUP = "/dl2/event/subarray" -DL2_TELESCOPE_GROUP = "/dl2/event/telescope" +# Convienient constants for column names and table keys +CONFIG_INSTRUMENT_SUBARRAY_LAYOUT = "/configuration/instrument/subarray/layout" +CONFIG_INSTRUMENT_TEL = "/configuration/instrument/telescope" +CONFIG_INSTRUMENT_TEL_CAMERA = "/configuration/instrument/telescope/camera" SUBARRAY_EVENT_KEYS = ["obs_id", "event_id"] -TELESCOPE_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] - -__all__ = [ - "PredictCTLearnModel", - "MonoPredictCTLearnModel", - "StereoPredictCTLearnModel", +TEL_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] +TEL_ITER_GROUPS = [ + R0_TEL_GROUP, + R1_TEL_GROUP, + FIXED_POINTING_GROUP, + DL0_TEL_POINTING_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_CALIBRATION_GROUP, + DL1_TEL_ILLUMINATOR_THROUGHPUT_GROUP, + DL1_TEL_MUON_THROUGHPUT_GROUP, + DL1_TEL_OPTICAL_PSF_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_IMAGES_GROUP, + DL1_TEL_MUON_GROUP, + SIMULATION_IMAGES_GROUP, + SIMULATION_IMPACT_GROUP, + SIMULATION_PARAMETERS_GROUP, ] - class PredictCTLearnModel(Tool): """ Base tool to predict the gammaness, energy and arrival direction from R1/DL1 data using CTLearn models. @@ -88,8 +138,6 @@ class PredictCTLearnModel(Tool): ---------- input_url : pathlib.Path Input ctapipe HDF5 files including pixel-wise image or waveform data. - use_HDF5Merger : bool - Set whether to use the HDF5Merger component to copy the selected tables from the input file to the output file. dl1_features : bool Set whether to include the dl1 feature vectors in the output file. dl2_telescope : bool @@ -107,19 +155,17 @@ class PredictCTLearnModel(Tool): prefix : str Name of the reconstruction algorithm used to generate the dl2 data. load_type_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the classification of the primary particle type. + Path to a Keras model file (Keras3) for the classification of the primary particle type. load_energy_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression of the primary particle energy. + Path to a Keras model file (Keras3) for the regression of the primary particle energy. load_cameradirection_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression + Path to a Keras model file (Keras3) for the regression of the primary particle arrival direction based on camera coordinate offsets. load_skydirection_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression + Path to a Keras model file (Keras3) for the regression of the primary particle arrival direction based on spherical coordinate offsets. output_path : pathlib.Path Output path to save the dl2 prediction results. - overwrite_tables : bool - Overwrite the table in the output file if it exists. keras_verbose : int Verbosity mode of Keras during the prediction. strategy : tf.distribute.Strategy @@ -141,7 +187,7 @@ class PredictCTLearnModel(Tool): Finish the tool. _predict_with_model(model_path) Load and predict with a CTLearn model. - _predict_classification(example_identifiers) + _predict_particletype(example_identifiers) Predict the classification of the primary particle type. _predict_energy(example_identifiers) Predict the energy of the primary particle. @@ -157,7 +203,7 @@ class PredictCTLearnModel(Tool): Create a table with NaNs for missing predictions. _store_pointing(all_identifiers) Store the telescope pointing table from to the output file. - _create_feature_vectors_table(example_identifiers, nonexample_identifiers, classification_feature_vectors, energy_feature_vectors, direction_feature_vectors) + _create_feature_vectors_table(example_identifiers, nonexample_identifiers, particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors) Create the table for the DL1 feature vectors. """ @@ -169,16 +215,6 @@ class PredictCTLearnModel(Tool): file_ok=True, ).tag(config=True) - use_HDF5Merger = Bool( - default_value=True, - allow_none=False, - help=( - "Set whether to use the HDF5Merger component to copy the selected tables " - "from the input file to the output file. CAUTION: This can only be used " - "if the output file not exists." - ), - ).tag(config=True) - dl1_features = Bool( default_value=False, allow_none=False, @@ -228,7 +264,7 @@ class PredictCTLearnModel(Tool): load_type_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the classification " + "Path to a Keras model file (Keras3) for the classification " "of the primary particle type." ), allow_none=True, @@ -240,7 +276,7 @@ class PredictCTLearnModel(Tool): load_energy_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle energy." ), allow_none=True, @@ -252,7 +288,7 @@ class PredictCTLearnModel(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle arrival direction based on camera coordinate offsets." ), allow_none=True, @@ -264,7 +300,7 @@ class PredictCTLearnModel(Tool): load_skydirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle arrival direction based on spherical coordinate offsets." ), allow_none=True, @@ -285,12 +321,6 @@ class PredictCTLearnModel(Tool): help="Output path to save the dl2 prediction results", ).tag(config=True) - overwrite_tables = Bool( - default_value=True, - allow_none=False, - help="Overwrite the table in the output file if it exists", - ).tag(config=True) - keras_verbose = Int( default_value=1, min=0, @@ -315,6 +345,12 @@ class PredictCTLearnModel(Tool): } flags = { + **flag( + "overwrite" , + "HDF5Merger.overwrite", + "Overwrite the output file if it exists", + "Do not overwrite the output file if it exists", + ), **flag( "dl1-features", "PredictCTLearnModel.dl1_features", @@ -333,12 +369,6 @@ class PredictCTLearnModel(Tool): "Include dl2 telescope-event-wise data in the output file", "Exclude dl2 telescope-event-wise data in the output file", ), - **flag( - "use-HDF5Merger", - "PredictCTLearnModel.use_HDF5Merger", - "Copy data using the HDF5Merger component (CAUTION: This can not be used if the output file already exists)", - "Do not copy data using the HDF5Merger component", - ), **flag( "r0-waveforms", "HDF5Merger.r0_waveforms", @@ -380,22 +410,10 @@ class PredictCTLearnModel(Tool): classes = classes_with_traits(DLDataReader) def setup(self): - # Check if the ctapipe HDF5Merger component is enabled - if self.use_HDF5Merger: - if os.path.exists(self.output_path): - raise ToolConfigurationError( - f"The output file '{self.output_path}' already exists. Please use " - "'--no-use-HDF5Merger' to disable the usage of the HDF5Merger component." - ) - # Copy selected tables from the input file to the output file - self.log.info("Copying to output destination.") - with HDF5Merger(self.output_path, parent=self) as merger: - merger(self.input_url) - else: - self.log.info( - "No copy to output destination, since the usage of the HDF5Merger component is disabled." - ) - + # Copy selected tables from the input file to the output file + self.log.info("Copying to output destination.") + with HDF5Merger(self.output_path, parent=self) as merger: + merger(self.input_url) # Create a MirroredStrategy. self.strategy = tf.distribute.MirroredStrategy() atexit.register(self.strategy._extended._collective_ops._lock.locked) # type: ignore @@ -421,10 +439,197 @@ def setup(self): self.last_batch_size = len(self.indices) % ( self.batch_size * self.strategy.num_replicas_in_sync ) + # Ensure subarray consistency in the output file + self._ensure_subarray_consistency() def finish(self): self.log.info("Tool is shutting down") + def _ensure_subarray_consistency(self): + """ + Align subarray metadata and trigger tables with the selected telescopes. + + When only a subset of telescopes is processed, overwrite the output file's + SubarrayDescription and trim the DL1 trigger tables to keep events that + involve the selected telescopes. Also rebuild the subarray trigger table + with the corresponding telescope participation masks. + """ + + input_subarray = SubarrayDescription.from_hdf( + self.input_url, + focal_length_choice=self.dl1dh_reader.focal_length_choice, + ) + if input_subarray.__eq__(self.dl1dh_reader.subarray): + return + + self.dl1dh_reader.subarray.to_hdf(self.output_path, overwrite=True) + self.log.info("SubarrayDescription was stored in '%s'", self.output_path) + + tel_trigger_table = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) + mask = np.isin(tel_trigger_table["tel_id"], self.dl1dh_reader.tel_ids) + tel_trigger_table = tel_trigger_table[mask] + tel_trigger_table.sort(TEL_EVENT_KEYS) + + write_table( + tel_trigger_table, + self.output_path, + DL1_TEL_TRIGGER_TABLE, + overwrite=True, + ) + + subarray_trigger_table = tel_trigger_table.copy() + subarray_trigger_table.keep_columns( + SUBARRAY_EVENT_KEYS + ["time", "event_type"] + ) + subarray_trigger_table = unique( + subarray_trigger_table, keys=SUBARRAY_EVENT_KEYS + ) + + tel_trigger_groups = tel_trigger_table.group_by(SUBARRAY_EVENT_KEYS) + tel_with_trigger = [] + for tel_trigger in tel_trigger_groups.groups: + tel_with_trigger_mask = np.zeros( + len(self.dl1dh_reader.tel_ids), dtype=bool + ) + tel_with_trigger_mask[ + self.dl1dh_reader.subarray.tel_ids_to_indices( + tel_trigger["tel_id"] + ) + ] = True + tel_with_trigger.append(tel_with_trigger_mask) + + subarray_trigger_table.add_column( + tel_with_trigger, index=-2, name="tels_with_trigger" + ) + + write_table( + subarray_trigger_table, + self.output_path, + DL1_SUBARRAY_TRIGGER_TABLE, + overwrite=True, + ) + # Update the simulation shower table to keep only events present in the subarray trigger table + subarray_trigger_table.keep_columns(SUBARRAY_EVENT_KEYS) + sim_shower_table = read_table( + self.output_path, + SIMULATION_SHOWER_TABLE, + ) + sim_shower_table = join( + sim_shower_table, + subarray_trigger_table, + keys=SUBARRAY_EVENT_KEYS, + join_type="right" + ) + sim_shower_table.sort(SUBARRAY_EVENT_KEYS) + write_table( + sim_shower_table, + self.output_path, + SIMULATION_SHOWER_TABLE, + overwrite=True, + ) + # Delete telescope-specific tables for unselected telescopes + self._delete_unselected_telescope_tables() + + def _delete_unselected_telescope_tables(self): + """ + Delete telescope-specific tables for unselected telescopes from the output file. + + Iterates through all telescope-related groups in the HDF5 file and removes + tables corresponding to telescopes that are not in the selected telescope list. + This ensures the output file only contains data for the telescopes that were + processed. The camera configuration tables are also pruned based on the camera indices. + """ + # Open the HDF5 file to prune the unselected telescope tables and camera configurations + with tables.open_file(self.output_path, mode="r+") as h5_file: + + def prune_group(group, valid_ids): + for table in group._f_iter_nodes("Table"): + idx = int(table._v_name.split("_")[-1]) + if idx not in valid_ids: + table._f_remove() + + # Telescope-specific tables + tel_ids = set(self.dl1dh_reader.tel_ids) + for group_name in TEL_ITER_GROUPS: + group = getattr(h5_file.root, group_name, None) + if group is not None: + prune_group(group, tel_ids) + + #Camera configuration tables + layout_node = getattr(h5_file.root, CONFIG_INSTRUMENT_SUBARRAY_LAYOUT, None) + camera_group = getattr(h5_file.root, CONFIG_INSTRUMENT_TEL_CAMERA, None) + if not (layout_node and camera_group): + return + # layout can be either a Table or a Group containing a Table + layout_table = ( + layout_node + if isinstance(layout_node, tables.Table) + else next(layout_node._f_iter_nodes("Table")) + ) + camera_indices = set(layout_table.col("camera_index")) + prune_group(camera_group, camera_indices) + + def _create_nan_table(self, nonexample_identifiers, columns, shapes): + """ + Create a table with NaNs for missing predictions. + + This method creates a table with NaNs for missing predictions for the non-example identifiers. + In stereo mode, the table also a column for the valid telescopes is added with all False values. + + Parameters: + ----------- + nonexample_identifiers : astropy.table.Table + Table containing the non-example identifiers. + columns : list of str + List of column names to create in the table. + shapes : list of shapes + List of shapes for the columns to create in the table. + + Returns: + -------- + nan_table : astropy.table.Table + Table containing NaNs for missing predictions. + """ + # Create a table with NaNs for missing predictions + nan_table = nonexample_identifiers.copy() + for column_name, shape in zip(columns, shapes): + nan_table.add_column(np.full(shape, np.nan), name=column_name) + # Add that no telescope is valid for the non-example identifiers in stereo mode + if self.dl1dh_reader.mode == "stereo": + nan_table.add_column( + np.zeros( + (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), + dtype=bool, + ), + name=f"{self.prefix}_telescopes", + ) + return nan_table + + def deduplicate_first_valid( + self, + table: Table, + keys=('obs_id', 'event_id'), + valid_col='CTLearn_is_valid', + ): + """ + Return a deduplicated Astropy Table. + + For each group defined by `keys`, keep the first row where + `valid_col` is True. If none are valid, keep the first row. + """ + + t = table.copy() + + t.sort( + list(keys) + [valid_col], + reverse=[False] * len(keys) + [True] + ) + + return unique(t, keys=list(keys), keep='first') + def _predict_with_model(self, model_path): """ Load and predict with a CTLearn model. @@ -435,7 +640,7 @@ def _predict_with_model(self, model_path): Parameters ---------- model_path : str - Path to a Keras model file (Keras3) or directory (Keras2). + Path to a Keras model file (Keras3). Returns ------- @@ -488,9 +693,18 @@ def _predict_with_model(self, model_path): x = layer(x) head = keras.Model(inputs=backbone_output_shape, outputs=x) # Apply the backbone model with the data loader to retrieve the feature vectors - feature_vectors = backbone_model.predict( - data_loader, verbose=self.keras_verbose - ) + try: + feature_vectors = backbone_model.predict( + data_loader, verbose=self.keras_verbose + ) + except ValueError as err: + if str(err).startswith("Input 0 of layer"): + raise ToolConfigurationError( + "Model input shape does not match the prediction data. " + "This is usually caused by selecting the wrong telescope_id. " + "Please ensure the telescope configuration matches the one used for training." + ) from err + raise # Apply the head model with the feature vectors to retrieve the prediction predict_data = Table( { @@ -522,7 +736,16 @@ def _predict_with_model(self, model_path): ) else: # Predict the data using the loaded model - predict_data = model.predict(data_loader, verbose=self.keras_verbose) + try: + predict_data = model.predict(data_loader, verbose=self.keras_verbose) + except ValueError as err: + if str(err).startswith("Input 0 of layer"): + raise ToolConfigurationError( + "Model input shape does not match the prediction data. " + "This is usually caused by selecting the wrong telescope_id. " + "Please ensure the telescope configuration matches the one used for training." + ) from err + raise # Create a astropy table with the prediction results # The classification task has a softmax layer as the last layer # which returns the probabilities for each class in an array, while @@ -546,7 +769,7 @@ def _predict_with_model(self, model_path): predict_data = vstack([predict_data, predict_data_last_batch]) return predict_data, feature_vectors - def _predict_classification(self, example_identifiers): + def _predict_particletype(self, example_identifiers): """ Predict the classification of the primary particle type. @@ -556,7 +779,7 @@ def _predict_classification(self, example_identifiers): Parameters: ----------- - classification_table : astropy.table.Table + particletype_table : astropy.table.Table Table containing the example identifiers with an additional column for the predicted classification score ('gammaness'). feature_vectors : np.ndarray @@ -570,11 +793,11 @@ def _predict_classification(self, example_identifiers): self.load_type_model_from ) # Create prediction table and add the predicted classification score ('gammaness') - classification_table = example_identifiers.copy() - classification_table.add_column( + particletype_table = example_identifiers.copy() + particletype_table.add_column( predict_data["type"].T[1], name=f"{self.prefix}_tel_prediction" ) - return classification_table, feature_vectors + return particletype_table, feature_vectors def _predict_energy(self, example_identifiers): """ @@ -819,64 +1042,6 @@ def _transform_spher_coord_offsets_to_sky(self, table) -> Table: ) return table - def _create_nan_table(self, nonexample_identifiers, columns, shapes): - """ - Create a table with NaNs for missing predictions. - - This method creates a table with NaNs for missing predictions for the non-example identifiers. - In stereo mode, the table also a column for the valid telescopes is added with all False values. - - Parameters: - ----------- - nonexample_identifiers : astropy.table.Table - Table containing the non-example identifiers. - columns : list of str - List of column names to create in the table. - shapes : list of shapes - List of shapes for the columns to create in the table. - - Returns: - -------- - nan_table : astropy.table.Table - Table containing NaNs for missing predictions. - """ - # Create a table with NaNs for missing predictions - nan_table = nonexample_identifiers.copy() - for column_name, shape in zip(columns, shapes): - nan_table.add_column(np.full(shape, np.nan), name=column_name) - # Add that no telescope is valid for the non-example identifiers in stereo mode - if self.dl1dh_reader.mode == "stereo": - nan_table.add_column( - np.zeros( - (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), - dtype=bool, - ), - name=f"{self.prefix}_telescopes", - ) - return nan_table - - def deduplicate_first_valid( - self, - table: Table, - keys=('obs_id', 'event_id'), - valid_col='CTLearn_is_valid', - ): - """ - Return a deduplicated Astropy Table. - - For each group defined by `keys`, keep the first row where - `valid_col` is True. If none are valid, keep the first row. - """ - - t = table.copy() - - t.sort( - list(keys) + [valid_col], - reverse=[False] * len(keys) + [True] - ) - - return unique(t, keys=list(keys), keep='first') - def _store_pointing(self, all_identifiers): """ Store the telescope pointing table from to the output file. @@ -918,13 +1083,12 @@ def _store_pointing(self, all_identifiers): write_table( tel_pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) pointing_info = vstack(pointing_info) if self.dl1dh_reader.mode == "stereo": @@ -953,13 +1117,12 @@ def _store_pointing(self, all_identifiers): write_table( pointing_table, self.output_path, - f"{SUBARRAY_POINTING_GROUP}", - overwrite=self.overwrite_tables, + DL1_SUBARRAY_POINTING_GROUP, ) self.log.info( "DL1 subarray pointing table was stored in '%s' under '%s'", self.output_path, - f"{SUBARRAY_POINTING_GROUP}", + DL1_SUBARRAY_POINTING_GROUP, ) return pointing_info @@ -967,7 +1130,7 @@ def _create_feature_vectors_table( self, example_identifiers, nonexample_identifiers=None, - classification_feature_vectors=None, + particletype_feature_vectors=None, energy_feature_vectors=None, direction_feature_vectors=None, ): @@ -984,8 +1147,8 @@ def _create_feature_vectors_table( Table containing the example identifiers. nonexample_identifiers : astropy.table.Table or None Table containing the non-example identifiers to fill the NaNs. - classification_feature_vectors : np.ndarray or None - Array containing the classification feature vectors. + particletype_feature_vectors : np.ndarray or None + Array containing the particletype feature vectors. energy_feature_vectors : np.ndarray or None Array containing the energy feature vectors. direction_feature_vectors : np.ndarray or None @@ -999,20 +1162,20 @@ def _create_feature_vectors_table( # Create the feature vector table feature_vector_table = example_identifiers.copy() columns_list, shapes_list = [], [] - if classification_feature_vectors is not None: + if particletype_feature_vectors is not None: is_valid_col = ~np.isnan( - np.min(classification_feature_vectors, axis=1), dtype=bool + np.min(particletype_feature_vectors, axis=1), dtype=bool ) feature_vector_table.add_column( - classification_feature_vectors, - name=f"{self.prefix}_tel_classification_feature_vectors", + particletype_feature_vectors, + name=f"{self.prefix}_tel_particletype_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_classification_feature_vectors") + columns_list.append(f"{self.prefix}_tel_particletype_feature_vectors") shapes_list.append( ( len(nonexample_identifiers), - classification_feature_vectors.shape[1], + particletype_feature_vectors.shape[1], ) ) if energy_feature_vectors is not None: @@ -1108,11 +1271,9 @@ class MonoPredictCTLearnModel(PredictCTLearnModel): --energy_model="/path/to/your/mono/energy/ctlearn_model.cpk" \\ --cameradirection_model="/path/to/your/mono/cameradirection/ctlearn_model.cpk" \\ --dl1-features \\ - --use-HDF5Merger \\ --no-dl1-images \\ --no-true-images \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ To predict from pixel-wise waveform data in mono mode using trained CTLearn models: > ctlearn-predict-mono-model \\ @@ -1123,13 +1284,11 @@ class MonoPredictCTLearnModel(PredictCTLearnModel): --type_model="/path/to/your/mono_waveform/type/ctlearn_model.cpk" \\ --energy_model="/path/to/your/mono_waveform/energy/ctlearn_model.cpk" \\ --cameradirection_model="/path/to/your/mono_waveform/cameradirection/ctlearn_model.cpk" \\ - --use-HDF5Merger \\ --no-r0-waveforms \\ --no-r1-waveforms \\ --no-dl1-images \\ --no-true-images \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ """ stereo_combiner_cls = ComponentName( @@ -1142,11 +1301,14 @@ def start(self): self.log.info("Processing the telescope pointings...") # Retrieve the IDs from the dl1dh for the prediction tables example_identifiers = self.dl1dh_reader.example_identifiers.copy() - example_identifiers.keep_columns(TELESCOPE_EVENT_KEYS) - all_identifiers = self.dl1dh_reader.tel_trigger_table.copy() - all_identifiers.keep_columns(TELESCOPE_EVENT_KEYS + ["time"]) + example_identifiers.keep_columns(TEL_EVENT_KEYS) + all_identifiers = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) + all_identifiers.keep_columns(TEL_EVENT_KEYS + ["time"]) nonexample_identifiers = setdiff( - all_identifiers, example_identifiers, keys=TELESCOPE_EVENT_KEYS + all_identifiers, example_identifiers, keys=TEL_EVENT_KEYS ) nonexample_identifiers.remove_column("time") # Pointing table for the mono mode for MC simulation @@ -1158,111 +1320,111 @@ def start(self): pointing_info = super()._store_pointing(all_identifiers) self.log.info("Starting the prediction...") - classification_feature_vectors = None + particletype_feature_vectors = None if self.load_type_model_from is not None: - self.type_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.PARTICLE_TYPE, - parent=self, + # Predict the type of the primary particle + particletype_table, particletype_feature_vectors = ( + super()._predict_particletype(example_identifiers) ) - # Predict the energy of the primary particle - classification_table, classification_feature_vectors = ( - super()._predict_classification(example_identifiers) + # Produce output table with NaNs for missing predictions + if len(nonexample_identifiers) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers, + columns=[f"{self.prefix}_tel_prediction"], + shapes=[(len(nonexample_identifiers),)], + ) + particletype_table = vstack([particletype_table, nan_table]) + # Add is_valid column to the particle type table + particletype_table.add_column( + ~np.isnan( + particletype_table[f"{self.prefix}_tel_prediction"].data, + dtype=bool, + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + particletype_table, + ParticleClassificationContainer, + prefix=self.prefix, + add_tel_prefix=True, ) if self.dl2_telescope: - # Produce output table with NaNs for missing predictions - if len(nonexample_identifiers) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], - shapes=[(len(nonexample_identifiers),)], - ) - classification_table = vstack([classification_table, nan_table]) - # Add is_valid column to the energy table - classification_table.add_column( - ~np.isnan( - classification_table[f"{self.prefix}_tel_prediction"].data, - dtype=bool, - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - classification_table, - ParticleClassificationContainer, - prefix=self.prefix, - add_tel_prefix=True, - ) for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope - telescope_mask = classification_table["tel_id"] == tel_id - classification_tel_table = classification_table[telescope_mask] - classification_tel_table.sort(TELESCOPE_EVENT_KEYS) + telescope_mask = particletype_table["tel_id"] == tel_id + particletype_tel_table = particletype_table[telescope_mask] + particletype_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file for the selected telescope write_table( - classification_tel_table, + particletype_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) + if self.dl2_subarray: self.log.info("Processing and storing the subarray type prediction...") - # If only one telescope is used, copy the classification table + # If only one telescope is used, copy the particletype table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: - classification_subarray_table = classification_table.copy() + particletype_subarray_table = particletype_table.copy() telescope_mask = ( - classification_subarray_table["tel_id"] + particletype_subarray_table["tel_id"] == self.dl1dh_reader.tel_ids[0] ) - classification_subarray_table = classification_subarray_table[ + particletype_subarray_table = particletype_subarray_table[ telescope_mask ] - classification_subarray_table.remove_column("tel_id") - for colname in classification_subarray_table.colnames: + particletype_subarray_table.remove_column("tel_id") + for colname in particletype_subarray_table.colnames: if "_tel_" in colname: - classification_subarray_table.rename_column( + particletype_subarray_table.rename_column( colname, colname.replace("_tel", "") ) - classification_subarray_table.add_column( + particletype_subarray_table.add_column( [ [val] - for val in classification_subarray_table[ + for val in particletype_subarray_table[ f"{self.prefix}_is_valid" ] ], name=f"{self.prefix}_telescopes", ) else: + self.type_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.PARTICLE_TYPE, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner - classification_subarray_table = ( - self.type_stereo_combiner.predict_table(classification_table) + particletype_subarray_table = ( + self.type_stereo_combiner.predict_table(particletype_table) ) # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - classification_subarray_table[f"{self.prefix}_telescopes"].dtype + particletype_subarray_table[f"{self.prefix}_telescopes"].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination reco_telescopes = np.zeros( ( - len(classification_subarray_table), + len(particletype_subarray_table), len(self.dl1dh_reader.tel_ids), ), dtype=bool, ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - classification_subarray_table[f"{self.prefix}_telescopes"] + particletype_subarray_table[f"{self.prefix}_telescopes"] ): if not tel_id_mask: continue @@ -1273,88 +1435,80 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - classification_subarray_table[f"{self.prefix}_telescopes"] = ( + particletype_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event - classification_subarray_table = super().deduplicate_first_valid( - table=classification_subarray_table, + # Deduplicate the subarray particletype table to have only one entry per event + particletype_subarray_table = super().deduplicate_first_valid( + table=particletype_subarray_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefix}_is_valid", ) - # Sort the subarray classification table - classification_subarray_table.sort(SUBARRAY_EVENT_KEYS) + # Sort the subarray particletype table + particletype_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file write_table( - classification_subarray_table, + particletype_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) energy_feature_vectors = None if self.load_energy_model_from is not None: - self.energy_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.ENERGY, - parent=self, - ) # Predict the energy of the primary particle energy_table, energy_feature_vectors = super()._predict_energy( example_identifiers ) - if self.dl2_telescope: - # Produce output table with NaNs for missing predictions - if len(nonexample_identifiers) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], - shapes=[(len(nonexample_identifiers),)], - ) - energy_table = vstack([energy_table, nan_table]) - # Add is_valid column to the energy table - energy_table.add_column( - ~np.isnan( - energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - energy_table, - ReconstructedEnergyContainer, - prefix=self.prefix, - add_tel_prefix=True, + # Produce output table with NaNs for missing predictions + if len(nonexample_identifiers) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers, + columns=[f"{self.prefix}_tel_energy"], + shapes=[(len(nonexample_identifiers),)], ) + energy_table = vstack([energy_table, nan_table]) + # Add is_valid column to the energy table + energy_table.add_column( + ~np.isnan( + energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + energy_table, + ReconstructedEnergyContainer, + prefix=self.prefix, + add_tel_prefix=True, + ) + if self.dl2_telescope: for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope telescope_mask = energy_table["tel_id"] == tel_id energy_tel_table = energy_table[telescope_mask] - energy_tel_table.sort(TELESCOPE_EVENT_KEYS) + energy_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file write_table( energy_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_ENERGY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( "Processing and storing the subarray energy prediction..." ) - # If only one telescope is used, copy the classification table + # If only one telescope is used, copy the particletype table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: energy_subarray_table = energy_table.copy() @@ -1376,6 +1530,12 @@ def start(self): name=f"{self.prefix}_telescopes", ) else: + self.energy_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.ENERGY, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner energy_subarray_table = self.energy_stereo_combiner.predict_table( energy_table @@ -1410,7 +1570,7 @@ def start(self): energy_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event + # Deduplicate the subarray energy table to have only one entry per event energy_subarray_table = super().deduplicate_first_valid( table=energy_subarray_table, keys=SUBARRAY_EVENT_KEYS, @@ -1422,86 +1582,78 @@ def start(self): write_table( energy_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) direction_feature_vectors = None if self.load_cameradirection_model_from is not None: - self.geometry_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.GEOMETRY, - parent=self, - ) # Join the prediction table with the telescope pointing table example_identifiers = join( left=example_identifiers, right=pointing_info, - keys=TELESCOPE_EVENT_KEYS, + keys=TEL_EVENT_KEYS, ) # Predict the arrival direction of the primary particle direction_table, direction_feature_vectors = ( super()._predict_cameradirection(example_identifiers) ) direction_tel_tables = [] - if self.dl2_telescope: - for tel_id in self.dl1dh_reader.selected_telescopes[ - self.dl1dh_reader.tel_type - ]: - # Retrieve the example identifiers for the selected telescope - telescope_mask = direction_table["tel_id"] == tel_id - direction_tel_table = direction_table[telescope_mask] - direction_tel_table = super()._transform_cam_coord_offsets_to_sky( - direction_tel_table - ) - # Produce output table with NaNs for missing predictions - nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id - nonexample_identifiers_tel = nonexample_identifiers[ - nan_telescope_mask - ] - if len(nonexample_identifiers_tel) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers_tel, - columns=[f"{self.prefix}_tel_alt", f"{self.prefix}_tel_az"], - shapes=[ - (len(nonexample_identifiers_tel),), - (len(nonexample_identifiers_tel),), - ], - ) - direction_tel_table = vstack([direction_tel_table, nan_table]) - direction_tel_table.sort(TELESCOPE_EVENT_KEYS) - # Add is_valid column to the direction table - direction_tel_table.add_column( - ~np.isnan( - direction_tel_table[f"{self.prefix}_tel_alt"].data, - dtype=bool, - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - direction_tel_table, - ReconstructedGeometryContainer, - prefix=self.prefix, - add_tel_prefix=True, + for tel_id in self.dl1dh_reader.selected_telescopes[ + self.dl1dh_reader.tel_type + ]: + # Retrieve the example identifiers for the selected telescope + telescope_mask = direction_table["tel_id"] == tel_id + direction_tel_table = direction_table[telescope_mask] + direction_tel_table = super()._transform_cam_coord_offsets_to_sky( + direction_tel_table + ) + # Produce output table with NaNs for missing predictions + nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id + nonexample_identifiers_tel = nonexample_identifiers[ + nan_telescope_mask + ] + if len(nonexample_identifiers_tel) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers_tel, + columns=[f"{self.prefix}_tel_alt", f"{self.prefix}_tel_az"], + shapes=[ + (len(nonexample_identifiers_tel),), + (len(nonexample_identifiers_tel),), + ], ) - direction_tel_tables.append(direction_tel_table) + direction_tel_table = vstack([direction_tel_table, nan_table]) + direction_tel_table.sort(TEL_EVENT_KEYS) + # Add is_valid column to the direction table + direction_tel_table.add_column( + ~np.isnan( + direction_tel_table[f"{self.prefix}_tel_alt"].data, + dtype=bool, + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + direction_tel_table, + ReconstructedGeometryContainer, + prefix=self.prefix, + add_tel_prefix=True, + ) + direction_tel_tables.append(direction_tel_table) + if self.dl2_telescope: # Save the prediction to the output file write_table( direction_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( @@ -1510,7 +1662,7 @@ def start(self): # Stack the telescope tables to the subarray table direction_tel_tables = vstack(direction_tel_tables) # Sort the table by the telescope event keys - direction_tel_tables.sort(TELESCOPE_EVENT_KEYS) + direction_tel_tables.sort(TEL_EVENT_KEYS) # If only one telescope is used, copy the classification table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: @@ -1536,6 +1688,12 @@ def start(self): name=f"{self.prefix}_telescopes", ) else: + self.geometry_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.GEOMETRY, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner direction_subarray_table = ( self.geometry_stereo_combiner.predict_table( @@ -1572,7 +1730,7 @@ def start(self): direction_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event + # Deduplicate the subarray direction table to have only one entry per event direction_subarray_table = super().deduplicate_first_valid( table=direction_subarray_table, keys=SUBARRAY_EVENT_KEYS, @@ -1584,13 +1742,12 @@ def start(self): write_table( direction_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) # Create the feature vector table if the DL1 features are enabled if self.dl1_features: @@ -1598,31 +1755,30 @@ def start(self): feature_vector_table = super()._create_feature_vectors_table( example_identifiers, nonexample_identifiers, - classification_feature_vectors, + particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors, ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TELESCOPE_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{prefix}/tel_{tel_id:03d} table. for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope telescope_mask = feature_vector_table["tel_id"] == tel_id feature_vectors_tel_table = feature_vector_table[telescope_mask] - feature_vectors_tel_table.sort(TELESCOPE_EVENT_KEYS) + feature_vectors_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file write_table( feature_vectors_tel_table, self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", ) def _store_mc_telescope_pointing(self, all_identifiers): @@ -1650,7 +1806,7 @@ def _store_mc_telescope_pointing(self, all_identifiers): keys=["obs_id", "tel_id"], ) # TODO: use keep_order for astropy v7.0.0 - tel_pointing.sort(TELESCOPE_EVENT_KEYS) + tel_pointing.sort(TEL_EVENT_KEYS) # Retrieve the example identifiers for the selected telescope tel_pointing_table = Table( { @@ -1662,13 +1818,12 @@ def _store_mc_telescope_pointing(self, all_identifiers): write_table( tel_pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) pointing_info.append(tel_pointing) pointing_info = vstack(pointing_info) @@ -1719,7 +1874,6 @@ class StereoPredictCTLearnModel(PredictCTLearnModel): --energy_model="/path/to/your/stereo/energy/ctlearn_model.cpk" \\ --skydirection_model="/path/to/your/stereo/skydirection/ctlearn_model.cpk" \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ """ def start(self): @@ -1727,7 +1881,10 @@ def start(self): # Retrieve the IDs from the dl1dh for the prediction tables example_identifiers = self.dl1dh_reader.unique_example_identifiers.copy() example_identifiers.keep_columns(SUBARRAY_EVENT_KEYS) - all_identifiers = self.dl1dh_reader.subarray_trigger_table.copy() + all_identifiers = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) all_identifiers.keep_columns(SUBARRAY_EVENT_KEYS + ["time"]) nonexample_identifiers = setdiff( all_identifiers, example_identifiers, keys=SUBARRAY_EVENT_KEYS @@ -1756,11 +1913,11 @@ def start(self): pointing_info = super()._store_pointing(all_identifiers) self.log.info("Starting the prediction...") - classification_feature_vectors = None + particletype_feature_vectors = None if self.load_type_model_from is not None: # Predict the classification of the primary particle - classification_table, classification_feature_vectors = ( - super()._predict_classification(example_identifiers) + particletype_table, particletype_feature_vectors = ( + super()._predict_particletype(example_identifiers) ) if self.dl2_subarray: # Produce output table with NaNs for missing predictions @@ -1770,46 +1927,46 @@ def start(self): columns=[f"{self.prefix}_tel_prediction"], shapes=[(len(nonexample_identifiers),)], ) - classification_table = vstack([classification_table, nan_table]) - # Add is_valid column to the classification table - classification_table.add_column( + particletype_table = vstack([particletype_table, nan_table]) + # Add is_valid column to the particletype table + particletype_table.add_column( ~np.isnan( - classification_table[f"{self.prefix}_tel_prediction"].data, + particletype_table[f"{self.prefix}_tel_prediction"].data, dtype=bool, ), name=f"{self.prefix}_tel_is_valid", ) # Rename the columns for the stereo mode - classification_table.rename_column( + particletype_table.rename_column( f"{self.prefix}_tel_prediction", f"{self.prefix}_prediction" ) - classification_table.rename_column( + particletype_table.rename_column( f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" ) - # Deduplicate the subarray classification table to have only one entry per event - classification_table = super().deduplicate_first_valid( - table=classification_table, + # Deduplicate the subarray particletype table to have only one entry per event + particletype_table = super().deduplicate_first_valid( + table=particletype_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefix}_is_valid", ) - classification_table.sort(SUBARRAY_EVENT_KEYS) + particletype_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( - classification_table, + particletype_table, ParticleClassificationContainer, prefix=self.prefix, ) # Save the prediction to the output file write_table( - classification_table, + particletype_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", overwrite=self.overwrite_tables, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) energy_feature_vectors = None if self.load_energy_model_from is not None: @@ -1857,13 +2014,12 @@ def start(self): write_table( energy_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) direction_feature_vectors = None if self.load_skydirection_model_from is not None: @@ -1915,13 +2071,12 @@ def start(self): write_table( direction_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) # Create the feature vector table if the DL1 features are enabled @@ -1930,17 +2085,17 @@ def start(self): feature_vector_table = super()._create_feature_vectors_table( example_identifiers, nonexample_identifiers, - classification_feature_vectors, + particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors, ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TELESCOPE_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{prefix}/tel_{tel_id:03d} table. # Rename the columns for the stereo mode feature_vector_table.rename_column( - f"{self.prefix}_tel_classification_feature_vectors", - f"{self.prefix}_classification_feature_vectors", + f"{self.prefix}_tel_particletype_feature_vectors", + f"{self.prefix}_particletype_feature_vectors", ) feature_vector_table.rename_column( f"{self.prefix}_tel_energy_feature_vectors", @@ -1959,7 +2114,6 @@ def start(self): feature_vector_table, self.output_path, f"{DL1_SUBARRAY_GROUP}/features/{self.prefix}", - overwrite=self.overwrite_tables, ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", @@ -1979,7 +2133,7 @@ def _store_mc_subarray_pointing(self, all_identifiers): # Read the subarray pointing table pointing_info = read_table( self.input_url, - f"{SIMULATION_CONFIG_TABLE}", + SIMULATION_RUN_TABLE, ) # Assuming min_az = max_az and min_alt = max_alt pointing_info.keep_columns(["obs_id", "min_az", "min_alt"]) @@ -2007,13 +2161,12 @@ def _store_mc_subarray_pointing(self, all_identifiers): write_table( pointing_table, self.output_path, - f"{SUBARRAY_POINTING_GROUP}", - overwrite=self.overwrite_tables, + DL1_SUBARRAY_POINTING_GROUP, ) self.log.info( "DL1 subarray pointing table was stored in '%s' under '%s'", self.output_path, - f"{SUBARRAY_POINTING_GROUP}", + DL1_SUBARRAY_POINTING_GROUP, ) return pointing_info diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py new file mode 100644 index 00000000..23f32352 --- /dev/null +++ b/ctlearn/tools/tests/test_predict_model.py @@ -0,0 +1,94 @@ +import shutil +import numpy as np +import pytest + +from ctapipe.core import run_tool +from ctapipe.io import TableLoader +from ctlearn.tools import MonoPredictCTLearnModel + +# Columns that should be present in the output DL2 file +REQUIRED_COLUMNS = [ + "event_id", + "obs_id", + "CTLearn_alt", + "true_alt", + "CTLearn_az", + "true_az", + "CTLearn_prediction", + "true_shower_primary_id", + "CTLearn_energy", + "true_energy", + "CTLearn_is_valid", + "CTLearn_telescopes", + "tels_with_trigger" +] + + +@pytest.mark.parametrize("dl2_tel_flag", ["dl2-telescope", "no-dl2-telescope"]) +def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2_tel_flag): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy the trained models to the model directory + for reco_task in ["type", "energy", "cameradirection"]: + shutil.copy(ctlearn_trained_dl1_models[f"{reco_task}"], model_dir / f"ctlearn_model_{reco_task}.keras") + model_file = model_dir / f"ctlearn_model_{reco_task}.keras" + assert model_file.exists(), f"Trained model file not found for {reco_task}" + + # Build command-line arguments + argv = [ + f"--input_url={dl1_gamma_file}", + "--PredictCTLearnModel.batch_size=4", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_model_cameradirection.keras", + "--no-dl1-images", + "--no-true-images", + f"--{dl2_tel_flag}", + ] + # Test with different allowed telescope lists (single and multiple telescope IDs) + allowed_tels_dict = {"single_tel_id" : [7], "multiple_tel_ids": [7, 13, 15, 16, 17, 19]} + for name, allowed_tels in allowed_tels_dict.items(): + output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{name}.dl2.h5" + # Run Prediction tool + assert run_tool( + MonoPredictCTLearnModel(), + argv = argv + [ + f"--output={output_file}", + f"--DLImageReader.allowed_tels={allowed_tels}", + ], + cwd=tmp_path + ) == 0 + + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id(telescopes=allowed_tels, dl1_parameters=True, dl2=True) + for tel_id in allowed_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "leakage_pixels_width_2", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" + assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" + assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py new file mode 100644 index 00000000..b32c5256 --- /dev/null +++ b/ctlearn/tools/tests/test_train_model.py @@ -0,0 +1,75 @@ +import pandas as pd +import pytest +import shutil + +from ctapipe.core import run_tool +from ctlearn.tools import TrainCTLearnModel + +@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection"]) +def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_path): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) + + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{reco_task}" + allowed_tels = [7, 13, 15, 16, 17, 19] + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=2", + "--TrainCTLearnModel.batch_size=4", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}" + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ]) + + # Run training + assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 + + # --- Additional checks --- + # Check that the trained model exists + model_file = output_dir / "ctlearn_model.keras" + assert model_file.exists(), f"Trained model file not found for {reco_task}" + # Check training_log.csv exists + log_file = output_dir / "training_log.csv" + assert log_file.exists(), f"Training log file not found for {reco_task}" + # Read CSV and verify number of epochs + log_df = pd.read_csv(log_file) + num_epochs_logged = log_df.shape[0] + assert num_epochs_logged == 2, f"Expected two epochs, found {num_epochs_logged} for {reco_task}" + # Check that val_loss column exists + assert "val_loss" in log_df.columns, ( + f"'val_loss' column missing in training_log.csv for {reco_task}" + ) + val_loss = log_df["val_loss"].dropna() + assert not val_loss.empty, ( + f"'val_loss' column is empty for {reco_task}" + ) + assert ((val_loss >= 0.0) & (val_loss <= 1.0)).all(), ( + f"'val_loss' values out of range [0.0, 1.0] for {reco_task}: " + f"{val_loss.tolist()}" + ) \ No newline at end of file diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py index 2ce4ced5..4c440350 100644 --- a/ctlearn/tools/train_model.py +++ b/ctlearn/tools/train_model.py @@ -335,11 +335,7 @@ def setup(self): monitor = "val_loss" monitor_mode = "min" # Model checkpoint callback - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - model_path = f"{self.output_dir}/ctlearn_model.keras" - else: - model_path = f"{self.output_dir}/ctlearn_model.cpk" + model_path = f"{self.output_dir}/ctlearn_model.keras" model_checkpoint_callback = keras.callbacks.ModelCheckpoint( filepath=model_path, monitor=monitor, diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 20a49207..00000000 --- a/environment.yml +++ /dev/null @@ -1,23 +0,0 @@ -# conda env create -f environment.yml -name: ctlearn -channels: - - anaconda - - conda-forge -dependencies: - - python=3.10 - - astropy - - setuptools - - numpy - - pandas - - pip - - pytables - - tables - - c-blosc2=2.13 - - pyyaml - - scikit-learn - - ctapipe - - pip: - - numba - - tensorflow>=2.14,<2.15 - - dl1_data_handler>=0.14.5,<0.15 - - pydot diff --git a/pyproject.toml b/pyproject.toml index 434bd27a..c361c71f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,16 +20,15 @@ classifiers = [ "Topic :: Scientific/Engineering :: Astronomy", "Topic :: Scientific/Engineering :: Physics", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] -requires-python = ">=3.10" +requires-python = ">=3.12" dependencies = [ - "dl1_data_handler>=0.14.5, <0.15", + "dl1_data_handler>=0.14.5", "astropy", "numpy", "pandas", @@ -37,10 +36,10 @@ dependencies = [ "pyyaml", "scikit-learn", "numba", - "tensorflow>=2.14,<2.15", + "tensorflow>=2.16", "pydot", "setuptools", - "ctapipe>=0.22.0, <0.26", + "ctapipe[all]>=0.28", ] dynamic = ["version"]