From 3167c613f12eb8757e648fd91ee741e1d11a2814 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Tue, 14 Apr 2026 17:13:45 +0100 Subject: [PATCH 01/13] initial implementation of ploughshare download --- src/pinefarm/external/__init__.py | 5 ++ src/pinefarm/external/interface.py | 2 +- src/pinefarm/external/plough.py | 80 ++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 src/pinefarm/external/plough.py diff --git a/src/pinefarm/external/__init__.py b/src/pinefarm/external/__init__.py index 3423915..af7af91 100644 --- a/src/pinefarm/external/__init__.py +++ b/src/pinefarm/external/__init__.py @@ -58,5 +58,10 @@ def decide_external_tool(dsname: str): from . import mg5 # pylint: disable=import-outside-toplevel return mg5.Mg5, "blue" + + if (configs["paths"]["runcards"] / dsname / "ploughshare_link.txt").exists(): + from . import plough + + return plough.Plough, "purple" raise ValueError(f"pinefarm could not discover the tool to use for {dsname}") diff --git a/src/pinefarm/external/interface.py b/src/pinefarm/external/interface.py index 708f15d..6c3c063 100644 --- a/src/pinefarm/external/interface.py +++ b/src/pinefarm/external/interface.py @@ -199,7 +199,7 @@ def postprocess(self): entries = {} if metadata.exists(): for line in metadata.read_text().splitlines(): - k, v = line.split("=") + k, v = line.split("=", 1) entries[k] = v for ext in ["*.pineappl.lz4", "*.pineappl"]: diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py new file mode 100644 index 0000000..ead5524 --- /dev/null +++ b/src/pinefarm/external/plough.py @@ -0,0 +1,80 @@ +from . import interface +from .. import table +import requests +import shutil +import tarfile +import subprocess +import os +import pineappl + +''' +Download grids + convert them to pineappl format +''' + +class Plough(interface.External): + + def __init__(self, pinecard, theorycard, *args, **kwargs): + super().__init__(pinecard, theorycard, *args, **kwargs) + self.ps_link = self.source/"ploughshare_link.txt" + with open(self.ps_link) as ps_link: + self.link = ps_link.readline() + + self.filename = self.link.rsplit('/')[-1] + self.foldername = self.filename.rsplit('.', 1)[0] + self.tarball = self.dest/self.filename + self.processor = self.source/"process_grids.sh" + self.run() + self.generate_pineappl() + self.timestamp = 0 + + def run(self): + ''' + Download and extract the .tgz file + ''' + print("Downloading from ploughshare...") + self.download_to_dest() + print(f"Grids successfully downloaded to {self.tarball}") + print("Extracting files...") + self.extract_tarball() + print(f"Grids successfully extracted to {self.foldername}") + + def results(self): + pass + + def collect_versions(self): + return {} + + def generate_pineappl(self): + print("Grid conversion started...") + # the grids are converted and processed here + os.environ["PS_DIR"] = str(self.gridsfolder) + # note that filename is also foldername + os.environ["FILENAME"] = str(self.foldername) + if os.access(self.processor, os.X_OK): + shutil.copy2(self.processor, self.dest) + subprocess.run("./process_grids.sh", cwd=self.dest, check=True) + (self.dest/"process_grids.sh").unlink() + else: + raise ValueError(f"Grid conversion file present but not executable: {self.processor}") + self.grids = [] + for g in self.dest.glob("*.pineappl.lz4"): + self.grids.append(g) + + def download_to_dest(self): + ''' + Download the file and move it to the output folder + ''' + with requests.get(self.link, stream=True) as r: + r.raise_for_status() + with (self.dest/self.filename).open("wb") as f: + for chunk in r.iter_content(chunk_size=1024*1024): + if chunk: + f.write(chunk) + + def extract_tarball(self): + ''' + extract the contents + ''' + with tarfile.open(self.tarball, "r:*") as tf: + tf.extractall(self.dest) + self.gridsfolder = self.dest/self.foldername/"grids" \ No newline at end of file From 1c1e0a50ba5581226351671cb61f66b0e8eb3646 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Fri, 17 Apr 2026 10:13:06 +0100 Subject: [PATCH 02/13] Update src/pinefarm/external/plough.py Co-authored-by: Felix Hekhorn --- src/pinefarm/external/plough.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index ead5524..5dfbe89 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -16,8 +16,7 @@ class Plough(interface.External): def __init__(self, pinecard, theorycard, *args, **kwargs): super().__init__(pinecard, theorycard, *args, **kwargs) self.ps_link = self.source/"ploughshare_link.txt" - with open(self.ps_link) as ps_link: - self.link = ps_link.readline() + self.link = self.ps_link.read_text() self.filename = self.link.rsplit('/')[-1] self.foldername = self.filename.rsplit('.', 1)[0] From 38cba0c4568ab043595d08fe0d13dd60606e25e8 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Sat, 18 Apr 2026 12:27:25 +0200 Subject: [PATCH 03/13] Update plough.py --- src/pinefarm/external/plough.py | 38 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index 5dfbe89..97d47a5 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -1,27 +1,34 @@ +''' +Download grids + convert them to pineappl format +''' + from . import interface from .. import table import requests +import urllib.request import shutil import tarfile import subprocess import os import pineappl -''' -Download grids + convert them to pineappl format -''' +PLOUGHSHARE_LINK_FILENAME = "ploughshare_link.txt" +GRIDS_PROCESSOR = "process_grids.sh" +GRIDS_TMP = "grids" + class Plough(interface.External): def __init__(self, pinecard, theorycard, *args, **kwargs): super().__init__(pinecard, theorycard, *args, **kwargs) - self.ps_link = self.source/"ploughshare_link.txt" - self.link = self.ps_link.read_text() + self.ps_link = self.source/PLOUGHSHARE_LINK_FILENAME + with open(self.ps_link) as ps_link: + self.link = ps_link.readline() self.filename = self.link.rsplit('/')[-1] - self.foldername = self.filename.rsplit('.', 1)[0] + self.dir_name = self.filename.rsplit('.', 1)[0] self.tarball = self.dest/self.filename - self.processor = self.source/"process_grids.sh" + self.processor = self.source/GRIDS_PROCESSOR self.run() self.generate_pineappl() self.timestamp = 0 @@ -35,7 +42,7 @@ def run(self): print(f"Grids successfully downloaded to {self.tarball}") print("Extracting files...") self.extract_tarball() - print(f"Grids successfully extracted to {self.foldername}") + print(f"Grids successfully extracted to {self.dir_name}") def results(self): pass @@ -46,9 +53,9 @@ def collect_versions(self): def generate_pineappl(self): print("Grid conversion started...") # the grids are converted and processed here - os.environ["PS_DIR"] = str(self.gridsfolder) - # note that filename is also foldername - os.environ["FILENAME"] = str(self.foldername) + os.environ["PS_DIR"] = str(self.grids_dir) + # note that filename is also dir_name + os.environ["FILENAME"] = str(self.dir_name) if os.access(self.processor, os.X_OK): shutil.copy2(self.processor, self.dest) subprocess.run("./process_grids.sh", cwd=self.dest, check=True) @@ -63,12 +70,7 @@ def download_to_dest(self): ''' Download the file and move it to the output folder ''' - with requests.get(self.link, stream=True) as r: - r.raise_for_status() - with (self.dest/self.filename).open("wb") as f: - for chunk in r.iter_content(chunk_size=1024*1024): - if chunk: - f.write(chunk) + urllib.request.urlretrieve(self.link, self.dest/self.filename) def extract_tarball(self): ''' @@ -76,4 +78,4 @@ def extract_tarball(self): ''' with tarfile.open(self.tarball, "r:*") as tf: tf.extractall(self.dest) - self.gridsfolder = self.dest/self.foldername/"grids" \ No newline at end of file + self.grids_dir = self.dest/self.dir_name/GRIDS_TMP \ No newline at end of file From 1e6c3a5554208775d43480f9b904e6e4884c964e Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Tue, 14 Apr 2026 17:13:45 +0100 Subject: [PATCH 04/13] initial implementation of ploughshare download --- src/pinefarm/external/__init__.py | 5 ++ src/pinefarm/external/interface.py | 2 +- src/pinefarm/external/plough.py | 80 ++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 src/pinefarm/external/plough.py diff --git a/src/pinefarm/external/__init__.py b/src/pinefarm/external/__init__.py index 3423915..af7af91 100644 --- a/src/pinefarm/external/__init__.py +++ b/src/pinefarm/external/__init__.py @@ -58,5 +58,10 @@ def decide_external_tool(dsname: str): from . import mg5 # pylint: disable=import-outside-toplevel return mg5.Mg5, "blue" + + if (configs["paths"]["runcards"] / dsname / "ploughshare_link.txt").exists(): + from . import plough + + return plough.Plough, "purple" raise ValueError(f"pinefarm could not discover the tool to use for {dsname}") diff --git a/src/pinefarm/external/interface.py b/src/pinefarm/external/interface.py index 708f15d..6c3c063 100644 --- a/src/pinefarm/external/interface.py +++ b/src/pinefarm/external/interface.py @@ -199,7 +199,7 @@ def postprocess(self): entries = {} if metadata.exists(): for line in metadata.read_text().splitlines(): - k, v = line.split("=") + k, v = line.split("=", 1) entries[k] = v for ext in ["*.pineappl.lz4", "*.pineappl"]: diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py new file mode 100644 index 0000000..ead5524 --- /dev/null +++ b/src/pinefarm/external/plough.py @@ -0,0 +1,80 @@ +from . import interface +from .. import table +import requests +import shutil +import tarfile +import subprocess +import os +import pineappl + +''' +Download grids + convert them to pineappl format +''' + +class Plough(interface.External): + + def __init__(self, pinecard, theorycard, *args, **kwargs): + super().__init__(pinecard, theorycard, *args, **kwargs) + self.ps_link = self.source/"ploughshare_link.txt" + with open(self.ps_link) as ps_link: + self.link = ps_link.readline() + + self.filename = self.link.rsplit('/')[-1] + self.foldername = self.filename.rsplit('.', 1)[0] + self.tarball = self.dest/self.filename + self.processor = self.source/"process_grids.sh" + self.run() + self.generate_pineappl() + self.timestamp = 0 + + def run(self): + ''' + Download and extract the .tgz file + ''' + print("Downloading from ploughshare...") + self.download_to_dest() + print(f"Grids successfully downloaded to {self.tarball}") + print("Extracting files...") + self.extract_tarball() + print(f"Grids successfully extracted to {self.foldername}") + + def results(self): + pass + + def collect_versions(self): + return {} + + def generate_pineappl(self): + print("Grid conversion started...") + # the grids are converted and processed here + os.environ["PS_DIR"] = str(self.gridsfolder) + # note that filename is also foldername + os.environ["FILENAME"] = str(self.foldername) + if os.access(self.processor, os.X_OK): + shutil.copy2(self.processor, self.dest) + subprocess.run("./process_grids.sh", cwd=self.dest, check=True) + (self.dest/"process_grids.sh").unlink() + else: + raise ValueError(f"Grid conversion file present but not executable: {self.processor}") + self.grids = [] + for g in self.dest.glob("*.pineappl.lz4"): + self.grids.append(g) + + def download_to_dest(self): + ''' + Download the file and move it to the output folder + ''' + with requests.get(self.link, stream=True) as r: + r.raise_for_status() + with (self.dest/self.filename).open("wb") as f: + for chunk in r.iter_content(chunk_size=1024*1024): + if chunk: + f.write(chunk) + + def extract_tarball(self): + ''' + extract the contents + ''' + with tarfile.open(self.tarball, "r:*") as tf: + tf.extractall(self.dest) + self.gridsfolder = self.dest/self.foldername/"grids" \ No newline at end of file From b9284adad0d14d6ec170da48110652b1f5274162 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Fri, 17 Apr 2026 10:13:06 +0100 Subject: [PATCH 05/13] Update src/pinefarm/external/plough.py Co-authored-by: Felix Hekhorn --- src/pinefarm/external/plough.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index ead5524..5dfbe89 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -16,8 +16,7 @@ class Plough(interface.External): def __init__(self, pinecard, theorycard, *args, **kwargs): super().__init__(pinecard, theorycard, *args, **kwargs) self.ps_link = self.source/"ploughshare_link.txt" - with open(self.ps_link) as ps_link: - self.link = ps_link.readline() + self.link = self.ps_link.read_text() self.filename = self.link.rsplit('/')[-1] self.foldername = self.filename.rsplit('.', 1)[0] From 991ede84808292e0d167e093e3bbfed022744728 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Sat, 18 Apr 2026 12:27:25 +0200 Subject: [PATCH 06/13] Update plough.py --- src/pinefarm/external/plough.py | 38 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index 5dfbe89..97d47a5 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -1,27 +1,34 @@ +''' +Download grids + convert them to pineappl format +''' + from . import interface from .. import table import requests +import urllib.request import shutil import tarfile import subprocess import os import pineappl -''' -Download grids + convert them to pineappl format -''' +PLOUGHSHARE_LINK_FILENAME = "ploughshare_link.txt" +GRIDS_PROCESSOR = "process_grids.sh" +GRIDS_TMP = "grids" + class Plough(interface.External): def __init__(self, pinecard, theorycard, *args, **kwargs): super().__init__(pinecard, theorycard, *args, **kwargs) - self.ps_link = self.source/"ploughshare_link.txt" - self.link = self.ps_link.read_text() + self.ps_link = self.source/PLOUGHSHARE_LINK_FILENAME + with open(self.ps_link) as ps_link: + self.link = ps_link.readline() self.filename = self.link.rsplit('/')[-1] - self.foldername = self.filename.rsplit('.', 1)[0] + self.dir_name = self.filename.rsplit('.', 1)[0] self.tarball = self.dest/self.filename - self.processor = self.source/"process_grids.sh" + self.processor = self.source/GRIDS_PROCESSOR self.run() self.generate_pineappl() self.timestamp = 0 @@ -35,7 +42,7 @@ def run(self): print(f"Grids successfully downloaded to {self.tarball}") print("Extracting files...") self.extract_tarball() - print(f"Grids successfully extracted to {self.foldername}") + print(f"Grids successfully extracted to {self.dir_name}") def results(self): pass @@ -46,9 +53,9 @@ def collect_versions(self): def generate_pineappl(self): print("Grid conversion started...") # the grids are converted and processed here - os.environ["PS_DIR"] = str(self.gridsfolder) - # note that filename is also foldername - os.environ["FILENAME"] = str(self.foldername) + os.environ["PS_DIR"] = str(self.grids_dir) + # note that filename is also dir_name + os.environ["FILENAME"] = str(self.dir_name) if os.access(self.processor, os.X_OK): shutil.copy2(self.processor, self.dest) subprocess.run("./process_grids.sh", cwd=self.dest, check=True) @@ -63,12 +70,7 @@ def download_to_dest(self): ''' Download the file and move it to the output folder ''' - with requests.get(self.link, stream=True) as r: - r.raise_for_status() - with (self.dest/self.filename).open("wb") as f: - for chunk in r.iter_content(chunk_size=1024*1024): - if chunk: - f.write(chunk) + urllib.request.urlretrieve(self.link, self.dest/self.filename) def extract_tarball(self): ''' @@ -76,4 +78,4 @@ def extract_tarball(self): ''' with tarfile.open(self.tarball, "r:*") as tf: tf.extractall(self.dest) - self.gridsfolder = self.dest/self.foldername/"grids" \ No newline at end of file + self.grids_dir = self.dest/self.dir_name/GRIDS_TMP \ No newline at end of file From a96b92634b8761524baea7a94f7010759a117c8a Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Sat, 18 Apr 2026 13:23:34 +0200 Subject: [PATCH 07/13] trying to pass pre-commit --- src/pinefarm/external/plough.py | 62 ++++++++++++++++----------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index 97d47a5..fd6a2d0 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -1,16 +1,16 @@ -''' -Download grids + convert them to pineappl format -''' +"""Download grids + convert them to pineappl format.""" -from . import interface -from .. import table -import requests -import urllib.request +import os import shutil -import tarfile import subprocess -import os +import tarfile +import urllib.request + import pineappl +import requests + +from .. import table +from . import interface PLOUGHSHARE_LINK_FILENAME = "ploughshare_link.txt" GRIDS_PROCESSOR = "process_grids.sh" @@ -18,25 +18,24 @@ class Plough(interface.External): + """Interface provider.""" def __init__(self, pinecard, theorycard, *args, **kwargs): super().__init__(pinecard, theorycard, *args, **kwargs) - self.ps_link = self.source/PLOUGHSHARE_LINK_FILENAME + self.ps_link = self.source / PLOUGHSHARE_LINK_FILENAME with open(self.ps_link) as ps_link: self.link = ps_link.readline() - - self.filename = self.link.rsplit('/')[-1] - self.dir_name = self.filename.rsplit('.', 1)[0] - self.tarball = self.dest/self.filename - self.processor = self.source/GRIDS_PROCESSOR + + self.filename = self.link.rsplit("/")[-1] + self.dir_name = self.filename.rsplit(".", 1)[0] + self.tarball = self.dest / self.filename + self.processor = self.source / GRIDS_PROCESSOR self.run() self.generate_pineappl() self.timestamp = 0 - + def run(self): - ''' - Download and extract the .tgz file - ''' + """Download and extract the .tgz file.""" print("Downloading from ploughshare...") self.download_to_dest() print(f"Grids successfully downloaded to {self.tarball}") @@ -45,12 +44,15 @@ def run(self): print(f"Grids successfully extracted to {self.dir_name}") def results(self): + """Results are collected and compared at the pineappl (script) level.""" pass def collect_versions(self): + """No additional programs involved.""" return {} - + def generate_pineappl(self): + """Converts donwloaded grids into pineappl format.""" print("Grid conversion started...") # the grids are converted and processed here os.environ["PS_DIR"] = str(self.grids_dir) @@ -59,23 +61,21 @@ def generate_pineappl(self): if os.access(self.processor, os.X_OK): shutil.copy2(self.processor, self.dest) subprocess.run("./process_grids.sh", cwd=self.dest, check=True) - (self.dest/"process_grids.sh").unlink() + (self.dest / "process_grids.sh").unlink() else: - raise ValueError(f"Grid conversion file present but not executable: {self.processor}") + raise ValueError( + f"Grid conversion file present but not executable: {self.processor}" + ) self.grids = [] for g in self.dest.glob("*.pineappl.lz4"): self.grids.append(g) - + def download_to_dest(self): - ''' - Download the file and move it to the output folder - ''' - urllib.request.urlretrieve(self.link, self.dest/self.filename) + """Download the file and move it to the output folder.""" + urllib.request.urlretrieve(self.link, self.dest / self.filename) def extract_tarball(self): - ''' - extract the contents - ''' + """Extract the contents.""" with tarfile.open(self.tarball, "r:*") as tf: tf.extractall(self.dest) - self.grids_dir = self.dest/self.dir_name/GRIDS_TMP \ No newline at end of file + self.grids_dir = self.dest / self.dir_name / GRIDS_TMP From 18161471e7b59884b25d00ecdb8bffb9e8d3bf79 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Sat, 18 Apr 2026 13:34:18 +0200 Subject: [PATCH 08/13] fix whitespaces in __init__.py --- src/pinefarm/external/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pinefarm/external/__init__.py b/src/pinefarm/external/__init__.py index af7af91..d8c8828 100644 --- a/src/pinefarm/external/__init__.py +++ b/src/pinefarm/external/__init__.py @@ -58,7 +58,7 @@ def decide_external_tool(dsname: str): from . import mg5 # pylint: disable=import-outside-toplevel return mg5.Mg5, "blue" - + if (configs["paths"]["runcards"] / dsname / "ploughshare_link.txt").exists(): from . import plough From 8c96007fa53c925c39cfb2fbf1f4badc7e68f7e4 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Mon, 20 Apr 2026 11:14:42 +0100 Subject: [PATCH 09/13] Update src/pinefarm/external/plough.py Co-authored-by: Felix Hekhorn --- src/pinefarm/external/plough.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index fd6a2d0..352bc17 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -23,8 +23,7 @@ class Plough(interface.External): def __init__(self, pinecard, theorycard, *args, **kwargs): super().__init__(pinecard, theorycard, *args, **kwargs) self.ps_link = self.source / PLOUGHSHARE_LINK_FILENAME - with open(self.ps_link) as ps_link: - self.link = ps_link.readline() + self.link = self.ps_link.read_text() self.filename = self.link.rsplit("/")[-1] self.dir_name = self.filename.rsplit(".", 1)[0] From 1f562c2708f5a033522559905939e68efbd73171 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Tue, 28 Apr 2026 10:35:29 +0200 Subject: [PATCH 10/13] making plough compatible with postprocessing --- src/pinefarm/cli/run.py | 24 +++++++-------- src/pinefarm/external/plough.py | 52 ++++++++++++++++----------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/pinefarm/cli/run.py b/src/pinefarm/cli/run.py index 58feb78..07f1b1d 100644 --- a/src/pinefarm/cli/run.py +++ b/src/pinefarm/cli/run.py @@ -190,18 +190,18 @@ def run_dataset(runner): # collect results in the output pineappl grid runner.generate_pineappl() - - table.print_table( - table.convolute_grid( - runner.grid, runner.pdf, integrated=isinstance(runner, mg5.Mg5) - ), - runner.results(), - runner.dest, - ) - - # TODO: annotate_version should be a post-processing step - # however at the moment only works in 1-grid cases - runner.annotate_versions() + if not hasattr(runner, "ps_link"): + table.print_table( + table.convolute_grid( + runner.grid, runner.pdf, integrated=isinstance(runner, mg5.Mg5) + ), + runner.results(), + runner.dest, + ) + + # TODO: annotate_version should be a post-processing step + # however at the moment only works in 1-grid cases + runner.annotate_versions() runner.postprocess() diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index 352bc17..144eb6d 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -2,7 +2,6 @@ import os import shutil -import subprocess import tarfile import urllib.request @@ -13,7 +12,6 @@ from . import interface PLOUGHSHARE_LINK_FILENAME = "ploughshare_link.txt" -GRIDS_PROCESSOR = "process_grids.sh" GRIDS_TMP = "grids" @@ -28,19 +26,25 @@ def __init__(self, pinecard, theorycard, *args, **kwargs): self.filename = self.link.rsplit("/")[-1] self.dir_name = self.filename.rsplit(".", 1)[0] self.tarball = self.dest / self.filename - self.processor = self.source / GRIDS_PROCESSOR - self.run() - self.generate_pineappl() - self.timestamp = 0 def run(self): """Download and extract the .tgz file.""" print("Downloading from ploughshare...") - self.download_to_dest() + try: + self.download_to_dest() + if self.tarball.exists(): + print(f"Grids successfully downloaded to {self.tarball}") + else: + raise FileNotFoundError( + f"{self.tarball} not found but the download didn't seem to fail?" + ) + except Exception as e: + raise FileNotFoundError(f"{self.tarball} could not be downloaded!") from e print(f"Grids successfully downloaded to {self.tarball}") print("Extracting files...") self.extract_tarball() - print(f"Grids successfully extracted to {self.dir_name}") + print(f"Grids successfully extracted to {self.dest}") + self.cleanup() def results(self): """Results are collected and compared at the pineappl (script) level.""" @@ -51,26 +55,11 @@ def collect_versions(self): return {} def generate_pineappl(self): - """Converts donwloaded grids into pineappl format.""" - print("Grid conversion started...") - # the grids are converted and processed here - os.environ["PS_DIR"] = str(self.grids_dir) - # note that filename is also dir_name - os.environ["FILENAME"] = str(self.dir_name) - if os.access(self.processor, os.X_OK): - shutil.copy2(self.processor, self.dest) - subprocess.run("./process_grids.sh", cwd=self.dest, check=True) - (self.dest / "process_grids.sh").unlink() - else: - raise ValueError( - f"Grid conversion file present but not executable: {self.processor}" - ) - self.grids = [] - for g in self.dest.glob("*.pineappl.lz4"): - self.grids.append(g) + """Grids are converted in postrun.sh.""" + return def download_to_dest(self): - """Download the file and move it to the output folder.""" + """Download the file to the output folder.""" urllib.request.urlretrieve(self.link, self.dest / self.filename) def extract_tarball(self): @@ -78,3 +67,14 @@ def extract_tarball(self): with tarfile.open(self.tarball, "r:*") as tf: tf.extractall(self.dest) self.grids_dir = self.dest / self.dir_name / GRIDS_TMP + grids_list = sorted(os.listdir(self.grids_dir)) + for i, grid in enumerate(grids_list): + extension = grid.split(".", 2)[2] + print(extension) + os.rename(self.grids_dir / grid, self.dest / f"grid_{i}.{extension}") + + def cleanup(self): + """Delete unnecessary files and create tmp.pineappl.lz4 to allow postprocessing.""" + shutil.rmtree(self.dest / self.dir_name) + self.tarball.unlink() + open(self.dest / "tmp.pineappl.lz4", "x") From 75ce0da30f57e9243d4f56c4d77c7f36d4b9f15f Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Tue, 28 Apr 2026 17:20:12 +0200 Subject: [PATCH 11/13] add extra attributes --- src/pinefarm/cli/run.py | 2 +- src/pinefarm/external/interface.py | 14 ++++++++++++-- src/pinefarm/external/plough.py | 9 ++------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/pinefarm/cli/run.py b/src/pinefarm/cli/run.py index 07f1b1d..3713916 100644 --- a/src/pinefarm/cli/run.py +++ b/src/pinefarm/cli/run.py @@ -190,7 +190,7 @@ def run_dataset(runner): # collect results in the output pineappl grid runner.generate_pineappl() - if not hasattr(runner, "ps_link"): + if runner._print_comparison: table.print_table( table.convolute_grid( runner.grid, runner.pdf, integrated=isinstance(runner, mg5.Mg5) diff --git a/src/pinefarm/external/interface.py b/src/pinefarm/external/interface.py index 6c3c063..0e90ace 100644 --- a/src/pinefarm/external/interface.py +++ b/src/pinefarm/external/interface.py @@ -34,12 +34,22 @@ class External(abc.ABC): kind = None def __init__( - self, name, theory, pdf, timestamp=None, runcards_path=None, output_folder=None + self, + name, + theory, + pdf, + timestamp=None, + runcards_path=None, + output_folder=None, + print_comparison=True, + run_without_grids=False, ): self.name = name self.theory = theory self.pdf = pdf self.timestamp = timestamp + self._print_comparison = print_comparison + self._run_without_grids = run_without_grids if runcards_path is None: self._runcards_path = configs.configs["paths"]["runcards"] else: @@ -179,7 +189,7 @@ def postprocess(self): else: grids = list(self.dest.glob("*.pineappl*")) - if not grids: + if not grids and not self._run_without_grids: raise ValueError("Tried to run postprocessing in a folder with no grids?") os.environ["PINECARD"] = self.source.as_posix() diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index 144eb6d..82385b2 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -26,6 +26,8 @@ def __init__(self, pinecard, theorycard, *args, **kwargs): self.filename = self.link.rsplit("/")[-1] self.dir_name = self.filename.rsplit(".", 1)[0] self.tarball = self.dest / self.filename + self._print_comparison = False + self._run_without_grids = True def run(self): """Download and extract the .tgz file.""" @@ -40,11 +42,9 @@ def run(self): ) except Exception as e: raise FileNotFoundError(f"{self.tarball} could not be downloaded!") from e - print(f"Grids successfully downloaded to {self.tarball}") print("Extracting files...") self.extract_tarball() print(f"Grids successfully extracted to {self.dest}") - self.cleanup() def results(self): """Results are collected and compared at the pineappl (script) level.""" @@ -70,11 +70,6 @@ def extract_tarball(self): grids_list = sorted(os.listdir(self.grids_dir)) for i, grid in enumerate(grids_list): extension = grid.split(".", 2)[2] - print(extension) os.rename(self.grids_dir / grid, self.dest / f"grid_{i}.{extension}") - - def cleanup(self): - """Delete unnecessary files and create tmp.pineappl.lz4 to allow postprocessing.""" shutil.rmtree(self.dest / self.dir_name) self.tarball.unlink() - open(self.dest / "tmp.pineappl.lz4", "x") From a46f03f82f8d969384414108a63ec965a71aae64 Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Sun, 3 May 2026 17:15:13 +0200 Subject: [PATCH 12/13] improvements suggested by @scarlehoff --- src/pinefarm/external/interface.py | 6 ++-- src/pinefarm/external/plough.py | 44 +++++++++++++++++------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/pinefarm/external/interface.py b/src/pinefarm/external/interface.py index 0e90ace..de8b3fe 100644 --- a/src/pinefarm/external/interface.py +++ b/src/pinefarm/external/interface.py @@ -42,14 +42,14 @@ def __init__( runcards_path=None, output_folder=None, print_comparison=True, - run_without_grids=False, + postrun_without_grids=False, ): self.name = name self.theory = theory self.pdf = pdf self.timestamp = timestamp self._print_comparison = print_comparison - self._run_without_grids = run_without_grids + self._postrun_without_grids = postrun_without_grids if runcards_path is None: self._runcards_path = configs.configs["paths"]["runcards"] else: @@ -189,7 +189,7 @@ def postprocess(self): else: grids = list(self.dest.glob("*.pineappl*")) - if not grids and not self._run_without_grids: + if not grids and not self._postrun_without_grids: raise ValueError("Tried to run postprocessing in a folder with no grids?") os.environ["PINECARD"] = self.source.as_posix() diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index 82385b2..b60df6f 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -16,38 +16,34 @@ class Plough(interface.External): - """Interface provider.""" + """Interface to download grids directly from ploughshare.""" def __init__(self, pinecard, theorycard, *args, **kwargs): - super().__init__(pinecard, theorycard, *args, **kwargs) + super().__init__( + pinecard, + theorycard, + *args, + print_comparison=False, + postrun_without_grids=True, + **kwargs, + ) self.ps_link = self.source / PLOUGHSHARE_LINK_FILENAME self.link = self.ps_link.read_text() self.filename = self.link.rsplit("/")[-1] self.dir_name = self.filename.rsplit(".", 1)[0] self.tarball = self.dest / self.filename - self._print_comparison = False - self._run_without_grids = True def run(self): """Download and extract the .tgz file.""" print("Downloading from ploughshare...") - try: - self.download_to_dest() - if self.tarball.exists(): - print(f"Grids successfully downloaded to {self.tarball}") - else: - raise FileNotFoundError( - f"{self.tarball} not found but the download didn't seem to fail?" - ) - except Exception as e: - raise FileNotFoundError(f"{self.tarball} could not be downloaded!") from e + self.download_to_dest() print("Extracting files...") self.extract_tarball() print(f"Grids successfully extracted to {self.dest}") def results(self): - """Results are collected and compared at the pineappl (script) level.""" + """Do nothing.""" pass def collect_versions(self): @@ -60,7 +56,16 @@ def generate_pineappl(self): def download_to_dest(self): """Download the file to the output folder.""" - urllib.request.urlretrieve(self.link, self.dest / self.filename) + try: + urllib.request.urlretrieve(self.link, self.dest / self.filename) + if self.tarball.exists(): + print(f"Grids successfully downloaded to {self.tarball}") + else: + raise FileNotFoundError( + f"{self.tarball} not found but the download didn't seem to fail?" + ) + except Exception as e: + raise FileNotFoundError(f"{self.tarball} could not be downloaded!") from e def extract_tarball(self): """Extract the contents.""" @@ -68,8 +73,9 @@ def extract_tarball(self): tf.extractall(self.dest) self.grids_dir = self.dest / self.dir_name / GRIDS_TMP grids_list = sorted(os.listdir(self.grids_dir)) - for i, grid in enumerate(grids_list): - extension = grid.split(".", 2)[2] - os.rename(self.grids_dir / grid, self.dest / f"grid_{i}.{extension}") + for grid in grids_list: + grid_num, extension = grid.split(".", 2)[1:] + grid_num = grid_num[-3:] + os.rename(self.grids_dir / grid, self.dest / f"grid_{grid_num}.{extension}") shutil.rmtree(self.dest / self.dir_name) self.tarball.unlink() From ac7925136a4e78c29f5be1dc0c92e22df10fe69a Mon Sep 17 00:00:00 2001 From: Andrew Pietraszkiewicz Date: Sun, 3 May 2026 17:18:50 +0200 Subject: [PATCH 13/13] rename GRIDS_TMP --- src/pinefarm/external/plough.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pinefarm/external/plough.py b/src/pinefarm/external/plough.py index b60df6f..cc175b1 100644 --- a/src/pinefarm/external/plough.py +++ b/src/pinefarm/external/plough.py @@ -12,7 +12,7 @@ from . import interface PLOUGHSHARE_LINK_FILENAME = "ploughshare_link.txt" -GRIDS_TMP = "grids" +GRIDS_FROM_PS = "grids" class Plough(interface.External): @@ -71,7 +71,7 @@ def extract_tarball(self): """Extract the contents.""" with tarfile.open(self.tarball, "r:*") as tf: tf.extractall(self.dest) - self.grids_dir = self.dest / self.dir_name / GRIDS_TMP + self.grids_dir = self.dest / self.dir_name / GRIDS_FROM_PS grids_list = sorted(os.listdir(self.grids_dir)) for grid in grids_list: grid_num, extension = grid.split(".", 2)[1:]