From 2d55059884eb3f67dbb25527827a9231de84ff66 Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Tue, 23 Sep 2025 16:32:23 +0200 Subject: [PATCH] Add cog when running with gpao --- CHANGELOG.md | 2 + Dockerfile | 3 + altianalysis/run_difference_with_gpao.py | 90 +++++++++++++++++++++--- test/test_run_difference_with_gpao.py | 88 ++++++++++++++++++++--- 4 files changed, 163 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5726ab7..84b6642 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # master +-feature: option to generate a COG when running with gpao + # 1.1.0 - refactor: rename python files to use english words diff --git a/Dockerfile b/Dockerfile index f7c1e29..a8ba173 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,9 @@ COPY --from=mamba_pdal /opt/conda/envs/altianalysis/lib/ /opt/conda/envs/altiana COPY --from=mamba_pdal /opt/conda/envs/altianalysis/ssl /opt/conda/envs/altianalysis/ssl COPY --from=mamba_pdal /opt/conda/envs/altianalysis/share/proj/proj.db /opt/conda/envs/altianalysis/share/proj/proj.db +# install gdal command line tools +COPY --from=mamba_pdal /opt/conda/envs/altianalysis/bin/*gdal* /opt/conda/envs/altianalysis/bin/ + ENV PATH=$PATH:/opt/conda/envs/altianalysis/bin/ ENV PROJ_LIB=/opt/conda/envs/altianalysis/share/proj/ diff --git a/altianalysis/run_difference_with_gpao.py b/altianalysis/run_difference_with_gpao.py index 60311dd..be519a8 100644 --- a/altianalysis/run_difference_with_gpao.py +++ b/altianalysis/run_difference_with_gpao.py @@ -28,7 +28,7 @@ def get_tile_names(folder: Path) -> List[str]: def create_one_job_one_difference(store: Store, dir_in: Path, input_file: str, output: Path): job_name = f"difference_{input_file}" command = f""" -docker run -t --rm --userns=host --shm-size=2gb +docker run -t --rm --userns=host -v {store.to_unix(dir_in)}:/input -v {store.to_unix(output)}:/output ghcr.io/ignf/altianalysis:{__version__} @@ -40,7 +40,7 @@ def create_one_job_one_difference(store: Store, dir_in: Path, input_file: str, o return job -def create_gpao_project( +def create_main_gpao_project( dtms_lhd: Path, out: Path, store: Store, @@ -70,6 +70,52 @@ def create_gpao_project( return Project(project_name, jobs) +def create_cog_gpao_project( + input_dir: str, + output_dir: str, + store: Store, + project_name=str, + output_filename: str = "cog.tif", + resampling: str = "CUBIC", +) -> Project: + job_name = "create_cog" + command = f""" +docker run --rm --userns=host +-v {store.to_unix(input_dir)}:/input +-v {store.to_unix(output_dir)}:/output +ghcr.io/ignf/altianalysis:{__version__} +bash -c 'ls -d /input/*.tif > cog_input_files.txt && +gdalbuildvrt -input_file_list cog_input_files.txt vrt_output.vrt && +gdal_translate \ + --config GDAL_DISABLE_READDIR_ON_OPEN TRUE \ + -co BIGTIFF=YES \ + -co RESAMPLING={resampling} \ + -co COMPRESS=LZW \ + -co PREDICTOR=YES \ + -of COG \ + vrt_output.vrt \ + /output/{output_filename}' +""" + job = Job(job_name, command, tags=["docker"]) + project = Project(project_name, [job]) + + return project + + +def create_gpao_projects( + dtms_lhd: Path, out: Path, store: Store, project_name: str, cog_filename: str +) -> List[Project]: + project_main = create_main_gpao_project(dtms_lhd, out, store, project_name) + projects = [project_main] + if cog_filename: + project_cog = create_cog_gpao_project( + out, out, output_filename=cog_filename, store=store, project_name=f"{project_name}_cog" + ) + project_cog.add_dependency(project_main) + projects.append(project_cog) + return projects + + def compute_on_gpao( dtms_lhd: Path, out: Path, @@ -77,7 +123,21 @@ def compute_on_gpao( local_store_path: Path, runner_store_path: PurePosixPath, project_name: str, + cog_filename: str = "", ): + """Compute difference on all tif files of a folder, and optionally create a COG of the created difference files + using GPAO for parallelization. + If created, the output cog is saved in the same folder as the individual tif outputs + + Args: + dtms_lhd (Path): folder containing the reference dtms + out (Path): folder in which to save the output tils + gpao_hostname (str): hostname of the gpao server + local_store_path (Path): path on your computer to a common store between local computer and gpao runners + runner_store_path (PurePosixPath): path on the gpao runners to a common store between local computer and gpao + project_name (str): name to give to the main gpao project + cog_filename (str): name of the cog file to create (no cog is generated if cog_filename is empty) + """ logging.debug(f"Use GPAO server: {gpao_hostname}") @@ -85,8 +145,9 @@ def compute_on_gpao( logging.debug(f"Local store path ({local_store_path}) converted to client store path ({runner_store_path})") - project = create_gpao_project(dtms_lhd, out, store, project_name) - builder = Builder([project]) + projects = create_gpao_projects(dtms_lhd, out, store, project_name, cog_filename) + + builder = Builder(projects) builder.save_as_json(out / "gpao_project.json") @@ -128,6 +189,14 @@ def parse_args(): ) parser.add_argument("-g", "--gpao_hostname", type=str, help="Hostname du serveur GPAO", default="localhost") parser.add_argument("-p", "--project_name", type=str, default="altianalysis", help="Nom de projet pour la GPAO") + parser.add_argument( + "-c", + "--cog_filename", + type=str, + help="Nom du fichier cog sauvé dans le même dossier que les sorties individuelles. " + "Pas de cog généré si non renseigné", + default="", + ) return parser.parse_args() @@ -138,10 +207,11 @@ def parse_args(): args = parse_args() compute_on_gpao( - args.dtm_lhd_dir, - args.out, - args.gpao_hostname, - args.local_store_path, - args.runner_store_path, - args.project_name, + dtms_lhd=args.dtm_lhd_dir, + out=args.out, + gpao_hostname=args.gpao_hostname, + local_store_path=args.local_store_path, + runner_store_path=args.runner_store_path, + project_name=args.project_name, + cog_filename=args.cog_filename, ) diff --git a/test/test_run_difference_with_gpao.py b/test/test_run_difference_with_gpao.py index b0aa244..9850eb1 100644 --- a/test/test_run_difference_with_gpao.py +++ b/test/test_run_difference_with_gpao.py @@ -1,4 +1,3 @@ -import json import os import shutil import test.utils as tu @@ -23,26 +22,85 @@ def setup_module(module): os.makedirs(TMP_PATH) -def test_create_gpao_project(): +def test_create_main_gpao_project(): # No need to create the output dir, this test does not run the gpao projects - output_dir = TMP_PATH / "create_gpao_project" + output_dir = TMP_PATH / "create_main_gpao_project" dtm_lidar_lhds = Path("./data/lhd_dir_gpao") project_name = "test_create_gpao_project_difference_with_dem_rge_alti" - project = run_difference_with_gpao.create_gpao_project(dtm_lidar_lhds, output_dir, STORE, project_name) + project = run_difference_with_gpao.create_main_gpao_project(dtm_lidar_lhds, output_dir, STORE, project_name) assert project is not None - project_json = json.loads(project.to_json()) + assert len(project.jobs) == 5 - assert len(project_json["jobs"]) == 5 + assert project.name.startswith(project_name) - assert project_json["name"].startswith(project_name) + +def test_create_gpao_projects_with_cog(): + # No need to create the output dir, this test does not run the gpao projects + output_dir = TMP_PATH / "create_gpao_projects_with_cog" + dtm_lidar_lhds = Path("./data/lhd_dir_gpao") + cog_filename = "cog.tif" + project_name = "test_create_gpao_project_difference_with_dem_rge_alti" + projects = run_difference_with_gpao.create_gpao_projects( + dtm_lidar_lhds, output_dir, STORE, project_name, cog_filename + ) + + assert len(projects) == 2 + assert len(projects[0].jobs) == 5 + assert len(projects[1].jobs) == 1 + + +def test_create_gpao_projects_without_cog(): + # No need to create the output dir, this test does not run the gpao projects + output_dir = TMP_PATH / "create_gpao_projects_without_cog" + dtm_lidar_lhds = Path("./data/lhd_dir_gpao") + project_name = "test_create_gpao_project_difference_with_dem_rge_alti" + projects = run_difference_with_gpao.create_gpao_projects(dtm_lidar_lhds, output_dir, STORE, project_name, "") + + assert len(projects) == 1 + assert len(projects[0].jobs) == 5 + + +@pytest.mark.gpao +def test_gpao_run_with_cog(): + dtm_lidar_lhds = "./data/lhd_dir_gpao" + output_dir = TMP_PATH / "gpao_run_with_cog" + output_dir.mkdir() + cog_filename = "cog.tif" + project_name = "test_run_altianalysis_gpao" + + gpao_hostname = os.environ.get("GPAO_API_URL", "localhost") + url_api = f"http://{gpao_hostname}:8080/api/" + + runner_store_path = Path(dtm_lidar_lhds).resolve() + local_store_path = Path("data/lhd_dir_gpao").resolve() + + run_difference_with_gpao.compute_on_gpao( + Path(dtm_lidar_lhds), + Path(output_dir), + gpao_hostname, + local_store_path, + runner_store_path, + project_name, + cog_filename=cog_filename, + ) + + if gpao_hostname == "localhost": + tu.execute_gpao_client(tags="docker", num_thread=4) + wait_running_job(url_api, project_name, delay_second=1, delay_log_second=10) + + assert ( + len([f for f in os.listdir(output_dir) if os.path.splitext(f)[-1] == ".tif"]) == 6 + ) # 5 individual output files + cog + + assert (output_dir / cog_filename).is_file() @pytest.mark.gpao -def test_gpao_run(): +def test_gpao_run_without_cog(): dtm_lidar_lhds = "./data/lhd_dir_gpao" - output_dir = TMP_PATH / "gpao_run" + output_dir = TMP_PATH / "gpao_run_without_cog" output_dir.mkdir() project_name = "test_run_altianalysis_gpao" @@ -53,9 +111,19 @@ def test_gpao_run(): local_store_path = Path("data/lhd_dir_gpao").resolve() run_difference_with_gpao.compute_on_gpao( - Path(dtm_lidar_lhds), Path(output_dir), gpao_hostname, local_store_path, runner_store_path, project_name + Path(dtm_lidar_lhds), + Path(output_dir), + gpao_hostname, + local_store_path, + runner_store_path, + project_name, + cog_filename="", ) if gpao_hostname == "localhost": tu.execute_gpao_client(tags="docker", num_thread=4) wait_running_job(url_api, project_name, delay_second=1, delay_log_second=10) + + assert ( + len([f for f in os.listdir(output_dir) if os.path.splitext(f)[-1] == ".tif"]) == 5 + ) # 5 individual output files + no cog