diff --git a/src/pinefarm/cli/run.py b/src/pinefarm/cli/run.py index 58feb78..3713916 100644 --- a/src/pinefarm/cli/run.py +++ b/src/pinefarm/cli/run.py @@ -190,18 +190,18 @@ def run_dataset(runner): # collect results in the output pineappl grid runner.generate_pineappl() - - table.print_table( - table.convolute_grid( - runner.grid, runner.pdf, integrated=isinstance(runner, mg5.Mg5) - ), - runner.results(), - runner.dest, - ) - - # TODO: annotate_version should be a post-processing step - # however at the moment only works in 1-grid cases - runner.annotate_versions() + if runner._print_comparison: + table.print_table( + table.convolute_grid( + runner.grid, runner.pdf, integrated=isinstance(runner, mg5.Mg5) + ), + runner.results(), + runner.dest, + ) + + # TODO: annotate_version should be a post-processing step + # however at the moment only works in 1-grid cases + runner.annotate_versions() runner.postprocess() diff --git a/src/pinefarm/external/__init__.py b/src/pinefarm/external/__init__.py index 3423915..d8c8828 100644 --- a/src/pinefarm/external/__init__.py +++ b/src/pinefarm/external/__init__.py @@ -59,4 +59,9 @@ def decide_external_tool(dsname: str): return mg5.Mg5, "blue" + if (configs["paths"]["runcards"] / dsname / "ploughshare_link.txt").exists(): + from . import plough + + return plough.Plough, "purple" + raise ValueError(f"pinefarm could not discover the tool to use for {dsname}") diff --git a/src/pinefarm/external/interface.py b/src/pinefarm/external/interface.py index 708f15d..de8b3fe 100644 --- a/src/pinefarm/external/interface.py +++ b/src/pinefarm/external/interface.py @@ -34,12 +34,22 @@ class External(abc.ABC): kind = None def __init__( - self, name, theory, pdf, timestamp=None, runcards_path=None, output_folder=None + self, + name, + theory, + pdf, + timestamp=None, + runcards_path=None, + output_folder=None, + print_comparison=True, + postrun_without_grids=False, ): self.name = name self.theory = theory self.pdf = pdf self.timestamp = timestamp + self._print_comparison = print_comparison + self._postrun_without_grids = postrun_without_grids if runcards_path is None: self._runcards_path = configs.configs["paths"]["runcards"] else: @@ -179,7 +189,7 @@ def postprocess(self): else: grids = list(self.dest.glob("*.pineappl*")) - if not grids: + if not grids and not self._postrun_without_grids: raise ValueError("Tried to run postprocessing in a folder with no grids?") os.environ["PINECARD"] = self.source.as_posix() @@ -199,7 +209,7 @@ def postprocess(self): entries = {} if metadata.exists(): for line in metadata.read_text().splitlines(): - k, v = line.split("=") + k, v = line.split("=", 1) entries[k] = v for ext in ["*.pineappl.lz4", "*.pineappl"]: diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py new file mode 100644 index 0000000..cc175b1 --- /dev/null +++ b/src/pinefarm/external/plough.py @@ -0,0 +1,81 @@ +"""Download grids + convert them to pineappl format.""" + +import os +import shutil +import tarfile +import urllib.request + +import pineappl +import requests + +from .. import table +from . import interface + +PLOUGHSHARE_LINK_FILENAME = "ploughshare_link.txt" +GRIDS_FROM_PS = "grids" + + +class Plough(interface.External): + """Interface to download grids directly from ploughshare.""" + + def __init__(self, pinecard, theorycard, *args, **kwargs): + super().__init__( + pinecard, + theorycard, + *args, + print_comparison=False, + postrun_without_grids=True, + **kwargs, + ) + self.ps_link = self.source / PLOUGHSHARE_LINK_FILENAME + self.link = self.ps_link.read_text() + + self.filename = self.link.rsplit("/")[-1] + self.dir_name = self.filename.rsplit(".", 1)[0] + self.tarball = self.dest / self.filename + + def run(self): + """Download and extract the .tgz file.""" + print("Downloading from ploughshare...") + self.download_to_dest() + print("Extracting files...") + self.extract_tarball() + print(f"Grids successfully extracted to {self.dest}") + + def results(self): + """Do nothing.""" + pass + + def collect_versions(self): + """No additional programs involved.""" + return {} + + def generate_pineappl(self): + """Grids are converted in postrun.sh.""" + return + + def download_to_dest(self): + """Download the file to the output folder.""" + try: + urllib.request.urlretrieve(self.link, self.dest / self.filename) + if self.tarball.exists(): + print(f"Grids successfully downloaded to {self.tarball}") + else: + raise FileNotFoundError( + f"{self.tarball} not found but the download didn't seem to fail?" + ) + except Exception as e: + raise FileNotFoundError(f"{self.tarball} could not be downloaded!") from e + + def extract_tarball(self): + """Extract the contents.""" + with tarfile.open(self.tarball, "r:*") as tf: + tf.extractall(self.dest) + self.grids_dir = self.dest / self.dir_name / GRIDS_FROM_PS + grids_list = sorted(os.listdir(self.grids_dir)) + for grid in grids_list: + grid_num, extension = grid.split(".", 2)[1:] + grid_num = grid_num[-3:] + os.rename(self.grids_dir / grid, self.dest / f"grid_{grid_num}.{extension}") + shutil.rmtree(self.dest / self.dir_name) + self.tarball.unlink()