Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5075547
feat: UploadLogFile command implementation
AcquaDiGiorgio Feb 2, 2026
7a03ef2
chore: improve UploadLogFile tests
AcquaDiGiorgio Feb 4, 2026
fd12496
feat: Change UploadLogFile DataManager Mocks to real DIRAC Classes
AcquaDiGiorgio Feb 11, 2026
8317c9f
chore: Update project name at imports
AcquaDiGiorgio Feb 12, 2026
91cef73
chore: setup lhcbdirac dependency to fork
AcquaDiGiorgio Apr 27, 2026
98ccc37
feat: Migrate BookkeepingReport command to cwl-dirac
AcquaDiGiorgio Apr 27, 2026
1da58a2
chore: set lhcbdirac dependency to https instead of ssh
AcquaDiGiorgio Apr 27, 2026
4586f84
chore: remove all DIRAC import mypy type checking
AcquaDiGiorgio Apr 28, 2026
0ad8e0e
feat: Migrate FailoverRequest command to cwl-dirac
AcquaDiGiorgio May 4, 2026
bd285c3
chore(tests): improve command fixtures
AcquaDiGiorgio May 4, 2026
26de911
feat: Migrate UploadOutputData command to cwl-dirac
AcquaDiGiorgio May 5, 2026
b87c180
feat: Migrate AnalyseXmlSummary command to cwl-dirac
AcquaDiGiorgio May 6, 2026
32e54b4
feat: Migrate WorkflowAccounting command to cwl-dirac
AcquaDiGiorgio May 6, 2026
f02159a
feat: Migrate UploadLogFile command to cwl-dirac
AcquaDiGiorgio May 6, 2026
f4d2821
chore: update pixi.lock
AcquaDiGiorgio May 7, 2026
028ca4f
chore: fix BookkeepingReport typo
AcquaDiGiorgio May 11, 2026
d9e24e9
chore: fix possible None values while saving workflow_commons
AcquaDiGiorgio May 11, 2026
6907021
chore: set proper commands exception catching
AcquaDiGiorgio May 11, 2026
1987844
chore: fix job path not being taken into account
AcquaDiGiorgio May 11, 2026
947bc8b
chore: change workflow commons from dict to a pydantic model
AcquaDiGiorgio May 15, 2026
396645e
chore: fix typos
AcquaDiGiorgio May 15, 2026
a81648b
chore: add logging to commands
AcquaDiGiorgio May 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10,687 changes: 5,782 additions & 4,905 deletions pixi.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"diracx-client>=0.0.8",
"diracx-cli>=0.0.8",
"lbprodrun",
"LHCbDIRAC @ git+https://git@gitlab.cern.ch/jlisalab/LHCbDIRAC.git@modules-to-cwl-migration", # Temporary fork dependency
"pydantic",
"pyyaml",
"typer",
Expand Down Expand Up @@ -78,7 +79,7 @@ allow_redefinition = true
enable_error_code = ["import", "attr-defined"]

[[tool.mypy.overrides]]
module = ["requests", "yaml"]
module = ["requests", "yaml", "DIRAC.*", "LHCbDIRAC.*", "DIRACCommon.*"]
ignore_missing_imports = true

[tool.pytest.ini_options]
Expand Down
17 changes: 16 additions & 1 deletion src/dirac_cwl/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
"""Command classes for workflow pre/post-processing operations."""

from .analyze_xml_summary import AnalyseXmlSummary
from .bookkeeping_report import BookkeepingReport
from .core import PostProcessCommand, PreProcessCommand
from .failover_request import FailoverRequest
from .upload_log_file import UploadLogFile
from .upload_output_data import UploadOutputData
from .workflow_accounting import WorkflowAccounting

__all__ = ["PreProcessCommand", "PostProcessCommand"]
__all__ = [
"AnalyseXmlSummary",
"PreProcessCommand",
"PostProcessCommand",
"UploadLogFile",
"BookkeepingReport",
"FailoverRequest",
"UploadOutputData",
"WorkflowAccounting",
]
75 changes: 75 additions & 0 deletions src/dirac_cwl/commands/analyze_xml_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""LHCb command for checking the XMLSummary output to ensure that the execution was done correctly."""

import logging
import os

from LHCbDIRAC.Workflow.Modules.AnalyseXMLSummary import _areInputsOK, _isXMLSummaryOK
from LHCbDIRAC.Workflow.Modules.BookkeepingReport import _generate_xml_object

from dirac_cwl.core.exceptions import WorkflowProcessingException

from .core import PostProcessCommand
from .workflow_commons import StepStatus, WorkflowCommons

logger = logging.getLogger(__name__)


class AnalyseXmlSummary(PostProcessCommand):
"""Performs a series of checks on the XMLSummary output to make sure the execution was done correctly."""

def execute(self, job_path: os.PathLike, **kwargs):
"""Execute the command.

:param job_path: Path to the job working directory.
:param kwargs: Additional keyword arguments.
"""
failed = False
workflow_commons = None
try:
workflow_commons = WorkflowCommons.load(job_path)

if not workflow_commons.xf_o:
workflow_commons.xf_o = _generate_xml_object(
workflow_commons.cleaned_application_name,
workflow_commons.production_id,
workflow_commons.prod_job_id,
workflow_commons.step_number,
workflow_commons.step_id,
)

jobOk = _isXMLSummaryOK(workflow_commons.xf_o)

if jobOk:
jobOk = _areInputsOK(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: in CWL, I think the new Rust implementation of lb-prod-run is going to be used:

And there are some checks around the xml summary directly integrated into it: https://gitlab.cern.ch/roneil/lbprodrun-rs/-/blob/main/src/summary.rs

So I assume we could only report the file status of the problematic files in this command

workflow_commons.xf_o,
workflow_commons.inputs,
workflow_commons.number_of_events,
workflow_commons.production_id,
workflow_commons.file_report,
)
if not jobOk:
workflow_commons.job_report.setApplicationStatus("XMLSummary reports error")
raise WorkflowProcessingException("XMLSummary reports error")

if workflow_commons.step_status == StepStatus.Failed:
logger.info("Workflow already failed")
return

workflow_commons.job_report.setApplicationStatus(f"{workflow_commons.application_name} Step OK")

except WorkflowProcessingException:
failed = True
raise

except Exception as e:
logger.exception("Exception in AnalyzeXmlSummary", exc_info=e)

failed = True
if workflow_commons:
workflow_commons.job_report.setApplicationStatus(repr(e))

raise WorkflowProcessingException(e) from e

finally:
if workflow_commons:
workflow_commons.save(job_path, failed=failed)
168 changes: 168 additions & 0 deletions src/dirac_cwl/commands/bookkeeping_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""LHCb command for bookkeeping report file generation based on the XMLSummary and the XML catalog."""

import logging
import os
from typing import Any, Dict

from DIRAC.Core.Utilities.ReturnValues import SErrorException, returnValueOrRaise
from DIRAC.Workflow.Utilities.Utils import getStepCPUTimes
from LHCbDIRAC.Core.Utilities.ProductionData import constructProductionLFNs
from LHCbDIRAC.Workflow.Modules.BookkeepingReport import (
_generate_xml_object,
_generateInputFiles,
_generateOutputFiles,
_prepare_job_info,
_process_time,
)
from LHCbDIRAC.Workflow.Modules.ModulesUtilities import getNumberOfProcessorsToUse

from dirac_cwl.core.exceptions import WorkflowProcessingException

from .core import PostProcessCommand
from .workflow_commons import StepStatus, WorkflowCommons

logger = logging.getLogger(__name__)


class BookkeepingReport(PostProcessCommand):
"""Generates a bookkeeping report file based on the XMLSummary and the pool XML catalog."""

def execute(self, job_path: os.PathLike, **kwargs):
"""Execute the command.

:param job_path: Path to the job working directory.
:param kwargs: Additional keyword arguments.
"""
failed = False
workflow_commons = None
try:
# Obtain Workflow Commons
workflow_commons = WorkflowCommons.load(job_path)

if workflow_commons.step_status == StepStatus.Failed:
return

# Setup variables
cpu_times: Dict[str, Any] = {}
if workflow_commons.start_time:
cpu_times["StartTime"] = workflow_commons.start_time
if workflow_commons.start_stats:
cpu_times["StartStats"] = workflow_commons.start_stats

exectime, cputime = getStepCPUTimes(cpu_times)

number_of_processors = getNumberOfProcessorsToUse(
workflow_commons.job_id,
workflow_commons.max_number_of_processors,
)

parameters = {
"PRODUCTION_ID": workflow_commons.production_id,
"JOB_ID": workflow_commons.prod_job_id,
"configVersion": workflow_commons.config_version,
"outputList": workflow_commons.outputs,
"configName": workflow_commons.config_name,
"outputDataFileMask": workflow_commons.output_data_file_mask,
}

if workflow_commons.bookkeeping_lfns and workflow_commons.production_output_data:
bk_lfns = workflow_commons.bookkeeping_lfns

if not isinstance(bk_lfns, list):
bk_lfns = [i.strip() for i in bk_lfns.split(";")]

else:
logger.info("BookkeepingLFNs parameters not found, creating on the fly")
try:
production_lfns_dict = returnValueOrRaise(
constructProductionLFNs(parameters, workflow_commons.bk_client)
)
except SErrorException as e:
logger.error("Could not create production LFNs", exc_info=e)
raise WorkflowProcessingException(f"Could not create production LFNs: {e}") from e

bk_lfns = production_lfns_dict["BookkeepingLFNs"]

ldate, ltime, ldatestart, ltimestart = _process_time(workflow_commons.start_time)

# Obtain XMLSummary
if not workflow_commons.xf_o:
workflow_commons.xf_o = _generate_xml_object(
workflow_commons.cleaned_application_name,
workflow_commons.production_id,
workflow_commons.prod_job_id,
workflow_commons.step_number,
workflow_commons.step_id,
)

info_dict = {
"exectime": exectime,
"cputime": cputime,
"numberOfProcessors": number_of_processors,
"production_id": workflow_commons.production_id,
"jobID": workflow_commons.job_id,
"siteName": workflow_commons.site_name,
"jobType": workflow_commons.job_type,
"applicationName": workflow_commons.application_name,
"applicationVersion": workflow_commons.application_version,
"numberOfEvents": workflow_commons.number_of_events,
}
Comment thread
AcquaDiGiorgio marked this conversation as resolved.

# Generate job_info object
job_info = _prepare_job_info(
info_dict,
ldatestart,
ltimestart,
ldate,
ltime,
workflow_commons.xf_o,
workflow_commons.inputs,
workflow_commons.step_id,
workflow_commons.bk_step_id,
workflow_commons.bk_client,
workflow_commons.config_name,
workflow_commons.config_version,
)

# Add input files to job_info
_generateInputFiles(job_info, bk_lfns, workflow_commons.inputs)

# Add output files to job_info
_generateOutputFiles(
job_info,
bk_lfns,
workflow_commons.event_type,
workflow_commons.application_name,
workflow_commons.xf_o,
workflow_commons.outputs,
workflow_commons.inputs,
)

# Generate SimulationConditions
if workflow_commons.application_name == "Gauss":
job_info.simulation_condition = workflow_commons.sim_description

# Convert job_info object to XML
doc = job_info.to_xml()

# Write to file
bfilename = f"bookkeeping_{workflow_commons.step_id}.xml"
with open(bfilename, "wb") as bfile:
bfile.write(doc)

except WorkflowProcessingException:
failed = True
raise

except Exception as e:
logger.exception("Exception in BookkeepingReport", exc_info=e)

failed = True
if workflow_commons:
workflow_commons.job_report.setApplicationStatus(repr(e))

raise WorkflowProcessingException(e) from e

finally:
if workflow_commons:
workflow_commons.save(job_path, failed=failed)
94 changes: 94 additions & 0 deletions src/dirac_cwl/commands/failover_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""LHCb command for committing the status of the files in the file report.

The status will be "Processed" if everything ended properly or "Unused" if it did not.
"""

import logging
import os

from DIRAC.Core.Utilities.ReturnValues import SErrorException, returnValueOrRaise
from LHCbDIRAC.Workflow.Modules.FailoverRequest import _prepareRequest

from dirac_cwl.core.exceptions import WorkflowProcessingException

from .core import PostProcessCommand
from .workflow_commons import StepStatus, WorkflowCommons

logger = logging.getLogger(__name__)


class FailoverRequest(PostProcessCommand):
Comment thread
AcquaDiGiorgio marked this conversation as resolved.
"""Commits the status of the files in the file report.

The status will be "Processed" if everything ended properly or "Unused" if it did not.
"""

def execute(self, job_path: os.PathLike, **kwargs):
"""Execute the command.

:param job_path: Path to the job working directory.
:param kwargs: Additional keyword arguments.
"""
failed = False
workflow_commons = None
try:
workflow_commons = WorkflowCommons.load(job_path)

_prepareRequest(workflow_commons.request, workflow_commons.job_id)

filesInFileReport = workflow_commons.file_report.getFiles()

for lfn in workflow_commons.inputs:
if lfn not in filesInFileReport:
status = "Processed" if workflow_commons.step_status == StepStatus.Done else "Unused"
if status == "Unused":
logger.info("Set status of %s to 'Unused' due to workflow failure", lfn)
else:
logger.debug("No status populated for %s, setting to 'Processed'", lfn)

workflow_commons.file_report.setFileStatus(int(workflow_commons.production_id), lfn, status)

try:
value = returnValueOrRaise(workflow_commons.file_report.commit())
if value:
logger.info("Status of files have been properly updated in the TransformationDB")
else:
logger.warning("No file status update reported. There are no input files?")
except SErrorException as e:
logger.error("Something went wrong trying fileReport.commit() %s", e)

if workflow_commons.file_report.getFiles():
logger.error("On first attempt, failed to report file status to TransformationDB")
try:
value = returnValueOrRaise(workflow_commons.file_report.generateForwardDISET())
if not value:
logger.info("On second attempt, files correctly reported to TransformationDB")
elif workflow_commons.step_status == StepStatus.Done:
logger.info("Adding a SetFileStatus operation to the request")
workflow_commons.request.addOperation(value)
else:
logger.info("The job should fail: do not set requests, as the DRA will take care")
except SErrorException as e:
logger.warning("Could not generate Operation for file report: %s", e)

if workflow_commons.step_status == StepStatus.Done:
workflow_commons.job_report.setApplicationStatus("Job Finished Successfully", True)

workflow_commons.generateFailoverFile()

except WorkflowProcessingException:
failed = True
raise

except Exception as e:
logger.exception("Exception in FailoverRequest", exc_info=e)

failed = True
if workflow_commons:
workflow_commons.job_report.setApplicationStatus(repr(e))

raise WorkflowProcessingException(e) from e

finally:
if workflow_commons:
workflow_commons.save(job_path, failed=failed)
Loading
Loading