unity-sds · mcduffie · Feb 14, 2026 · Feb 14, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/app_pack_generator/application.py b/app_pack_generator/application.py
@@ -4,6 +4,7 @@
 import logging
 
 import papermill
+
 import jsonschema
 from tabulate import tabulate
 
@@ -96,6 +97,9 @@ class ApplicationInterface(object):
     # Free arguments that are papermill parameters not associated with stage
     arguments: list[ApplicationParameter] = []
 
+    # Keyword value metadata pairs
+    metadata: dict = {}
+
 class ApplicationNotebook(ApplicationInterface):
     """Defines a parsed Jupyter Notebook read as a JSON file."""
 
@@ -110,9 +114,10 @@ def __init__(self, notebook_filename):
         self.notebook_parameters = []
 
         self.filename = notebook_filename
-        self.parse_notebook(notebook_filename)
+        self.parse_notebook_parameters(notebook_filename)
+        self.parse_notebook_metadata(notebook_filename)
 
-    def parse_notebook(self, notebook_filename):
+    def parse_notebook_parameters(self, notebook_filename):
         """Parses validate notebook_filename as a valid, existing Jupyter Notebook to
         ensure no exception is thrown.
         """
@@ -167,6 +172,29 @@ def parse_notebook(self, notebook_filename):
             else:
                 self.arguments.append(app_param)
 
+    def parse_notebook_metadata(self, notebook_filename, name=None, language=None):
+        "Use papermill inspection to get a list of keyword/value pairs from a 'metadata' cell"
+
+        nb = papermill.inspection._open_notebook(notebook_filename, parameters=None)
+
+        metadata_cell_idx = papermill.utils.find_first_tagged_cell_index(nb, "metadata")
+        if metadata_cell_idx < 0:
+            return
+
+        metadata_cell = nb.cells[metadata_cell_idx]
+
+        kernel_name = papermill.utils.nb_kernel_name(nb, name)
+        language = papermill.utils.nb_language(nb, language)
+
+        translator = papermill.translators.papermill_translators.find_translator(kernel_name, language)
+        try:
+            metadata = translator.inspect(metadata_cell)
+        except NotImplementedError:
+            logger.warning(f"Translator for '{language}' language does not support parameter introspection.")
+
+        for nb_param in metadata:
+            self.metadata[nb_param.name] = eval(nb_param.default)
+
     def parameter_summary(self):
 
         headers = [ 'name', 'inferred_type', 'cwl_type', 'default', 'help' ]
@@ -180,3 +208,14 @@ def parameter_summary(self):
             table_data.append(table_row)
 
         return tabulate(table_data, headers=headers)
+
+    def metadata_summary(self):
+
+        headers = [ 'name', 'value' ]
+
+        # Build up rows of the table using the header values as the columns
+        table_data = []
+        for key, value in self.metadata.items():
+            table_data.append( (key, value) )
+
+        return tabulate(table_data, headers=headers)
diff --git a/app_pack_generator/cwl.py b/app_pack_generator/cwl.py
@@ -2,6 +2,10 @@
 import re
 import yaml
 import logging
+from datetime import datetime
+from operator import setitem
+
+from .application import ApplicationNotebook
 
 logger = logging.getLogger(__name__)
 
@@ -19,7 +23,7 @@ class CWLError(Exception):
 
 class BaseCWL(object):
 
-    def __init__(self, application, template_dir=os.path.join(LOCAL_PATH, 'templates')):
+    def __init__(self, application : ApplicationNotebook, template_dir=os.path.join(LOCAL_PATH, 'templates')):
 
         self.app = application
         self.template_dir = template_dir
@@ -39,13 +43,30 @@ def generate_all(self, outdir, **kwargs):
 
 class ProcessCWL(BaseCWL):
 
-    def __init__(self, application, **kwargs):
+    def __init__(self, application, repo_info, **kwargs):
 
         super().__init__(application, **kwargs)
+
+        # Used to build metadata
+        self.repo_info = repo_info
 
         # Template CWL and descriptor files
         self.process_cwl = self._read_template( os.path.join(self.template_dir, 'process.cwl'))
 
+    @property
+    def _workflow(self):
+
+        graph = self.process_cwl['$graph']
+        search_func = lambda d: "class" in d and d["class"] == "Workflow"
+        return next(filter(search_func, graph), None)
+
+    @property
+    def _command_line_tool(self):
+
+        graph = self.process_cwl['$graph']
+        search_func = lambda d: "class" in d and d["class"] == "CommandLineTool"
+        return next(filter(search_func, graph), None)
+
     def generate_all(self, outdir, dockerurl="undefined", **kwargs):
         """Calls all of the application CWL generators as well as the application descriptor generator.
 
@@ -61,57 +82,135 @@ def _insert_argument_params(self):
         "Connect non stage in/out arguments to papermill parameters"
 
         # Forward the ordinary argument parameters to the process step directly
-        input_dict = self.process_cwl['inputs']
+        workflow_input_dict = self._workflow['inputs']
+        workflow_process_in_dict = self._workflow['steps']['process']['in']
+        cmd_input_dict = self._command_line_tool['inputs']
+
         for param in self.app.arguments:
-            name = param.name
+            param_name = param.name
 
-            input_dict[name] = {
+            param_def = {
                 'type': param.cwl_type,
                 'default': param.default,
+                'label': "argument",
+                'doc': param.help,
             }
 
+            cmd_input_dict[param_name] = param_def.copy()
+            workflow_input_dict[param_name] = param_def.copy()
+
+            workflow_process_in_dict[param_name] = param_name
+
     def _insert_input_params(self):
-        "Connects the special 'input' CWL parameter to the papermill parameter for recieving the stage-in directory location"
+        "Connects the special 'input' CWL parameter to the papermill parameter for receiving the stage-in directory location"
 
-        input_dict = self.process_cwl['inputs']
+        workflow_input_dict = self._workflow['inputs']
+        workflow_process_in_dict = self._workflow['steps']['process']['in']
+        cmd_input_dict = self._command_line_tool['inputs']
 
         # The input Directory is carried as an input from stage_in to process to
         # make sure that the contents are exposed to the process container
         #
         # If we had the stage_in collection filename be an input argument
-        # to process.cwl then it would be volume mounted in a seperate path
+        # to process.cwl then it would be volume mounted in a separate path
         # and we could not assume that the collection file is in the same
         # directory as the staged in results
         if self.app.stage_in_param is not None:
-            input_dict['input'] = 'Directory'
+            param_name = self.app.stage_in_param.name
+
+            param_def = {
+                'type': "Directory",
+                'label': "stage-in",
+                'doc': self.app.stage_in_param.help,
+            }
 
-            self.process_cwl['arguments'] = self.process_cwl.get('arguments', [])
-            self.process_cwl['arguments'] += [
+            cmd_input_dict[param_name] = param_def.copy()
+            workflow_input_dict[param_name] = param_def.copy()
+
+            self._command_line_tool['arguments'] = self._command_line_tool.get('arguments', [])
+            self._command_line_tool['arguments'] += [
                 '-p', self.app.stage_in_param.name,
-                f'$(inputs.input.path)'
+                f'$(inputs.{param_name}.path)'
             ]
-
-    def _insert_output_params(self):
-        "Connects the special 'input' CWL parameter to the papermill parameter for recieving the stage-out directory location"
 
-        # Connect the stage-out parameter to the name of the file specified in the template as output
-        # That value should contain a full path by using $(runtime.outdir)
-        input_dict = self.process_cwl['inputs']
+            workflow_process_in_dict[param_name] = param_name
+
+    def _insert_output_params(self):
+        "Connects the special 'input' CWL parameter to the papermill parameter for receiving the stage-out directory location"
 
         if self.app.stage_out_param is not None:
-            stage_out_process_dir = self.process_cwl['outputs']['output']['outputBinding']['glob']
+            stage_out_process_dir = self._command_line_tool['outputs']['outputs_result']['outputBinding']['glob']
 
             if not re.search('runtime.outdir', stage_out_process_dir):
                 raise CWLError(f"The process CWL template outputs/output path needs to contain $(runtime.outdir) in the path")
 
-            self.process_cwl['arguments'] = self.process_cwl.get('arguments', [])
-            self.process_cwl['arguments'] += [
+            self._command_line_tool['arguments'] = self._command_line_tool.get('arguments', [])
+            self._command_line_tool['arguments'] += [
                 '-p', self.app.stage_out_param.name, 
                 stage_out_process_dir
             ]
 
         else:
-            del self.process_cwl['outputs']['output']
+            del self._command_line_tool['outputs']['output']
+
+    def _insert_metadata(self):
+
+        # Defines overridable metadata values for the CWL with a setter function and default value
+        # metadata_name: (setter_function, default_value)
+        default_metadata = {
+            'id': (
+                lambda v: setitem(self._workflow, 'id', v), 
+                self.repo_info.name
+            ),
+            'doc': (
+                lambda v: setitem(self._workflow, 'doc', v), 
+                f"OGC Application for {self.repo_info.name} built from Jupyter notebook: {os.path.basename(self.app.filename)} from source repository: {self.repo_info.source_location}"
+            ),
+            'label': (
+                lambda v: setitem(self._workflow, 'label', v), 
+                f"OGC Application for {self.repo_info.name}"
+            ),
+            'author': (
+                lambda v: setitem(self.process_cwl["s:author"][0], "s:name", v), 
+                self.repo_info.owner
+            ),
+            'citation': (
+                lambda v: setitem(self.process_cwl, "s:citation", v), 
+                self.repo_info.source_location
+            ),
+            'codeRepository': (
+                lambda v: setitem(self.process_cwl, "s:codeRepository", v), 
+                self.repo_info.source_location
+            ),
+            'commitHash': (
+                lambda v: setitem(self.process_cwl, "s:commitHash", v), 
+                self.repo_info.commit_identifier
+            ),
+            'dateCreated': (
+                lambda v: setitem(self.process_cwl, "s:dateCreated", v), 
+                datetime.now().date()
+            ),
+            'version': (
+                lambda v: setitem(self.process_cwl, "s:version", v), 
+                "0.1.0"
+            ),
+            'softwareVersion': (
+                lambda v: setitem(self.process_cwl, "s:softwareVersion", v), 
+                "0.1.0"
+            ),
+        }
+
+        # Process all metadata values with a default value
+        for keyword, (setter, default_value) in default_metadata.items():
+            if keyword in self.app.metadata:
+                setter(self.app.metadata[keyword])
+            else:
+                setter(default_value)
+
+        # Process additional metadata that is not in the list of default values
+        for keyword in self.app.metadata:
+            if keyword not in default_metadata:
+                self.process_cwl[f"s:{keyword}"] = self.app.metadata[keyword]
 
     def generate_process_cwl(self, outdir, dockerurl):
         """Generates the application CWL.
@@ -122,7 +221,7 @@ def generate_process_cwl(self, outdir, dockerurl):
             os.makedirs(outdir)
 
         # Set correct URL for process Docker container
-        self.process_cwl['requirements']['DockerRequirement']['dockerPull'] = dockerurl
+        self._command_line_tool['requirements']['DockerRequirement']['dockerPull'] = dockerurl
 
         # Forward the ordinary argument parameters to the process step directly
         self._insert_argument_params()
@@ -131,6 +230,9 @@ def generate_process_cwl(self, outdir, dockerurl):
         self._insert_input_params()
         self._insert_output_params()
 
+        # Add metadata on the source repository
+        self._insert_metadata()
+
         fname = os.path.join(outdir, 'process.cwl')
         write_cwl_file(fname, self.process_cwl)
         return fname

diff --git a/app_pack_generator/docker.py b/app_pack_generator/docker.py
@@ -79,6 +79,16 @@ def image_reference(self):
         else:
             return f"{self.image_repository}:{self.image_tag}"
 
+    def image_source(self):
+        source = self.git_mgr.source_location
+
+        # If the source is from Github thenuse the parsed owner
+        # and repo name to properly set up package linking
+        if "github.com" in source:
+            source = f"https://github.com/{self.git_mgr.owner}/{self.git_mgr.name}"
+
+        return source
+
     def repo2docker(self):
         """Calls repo2docker on the local git directory to generate the Docker image.
 
@@ -96,8 +106,21 @@ def repo2docker(self):
         logger.info(f"Building Docker image named {self.image_reference}")
 
         # Build initial repo2docker command line arguments
-        cmd = ['jupyter-repo2docker', '--user-id', '1000', '--user-name', 'jovyan',
-               '--no-run', '--debug', '--image-name', self.image_reference]
+        # Do not supply the --user-id argument as it will cause
+        # permission issues when the CWL is run using
+        # cwltool with the --no-match-user argument
+        cmd = ['jupyter-repo2docker',
+               '--user-name', 'jovyan',
+               '--no-run', '--debug', 
+               '--image-name', self.image_reference]
+
+        # Add repo source when appropriate
+        # Used by GHCR to connect a package to a repo
+        # https://docs.github.com/en/packages/learn-github-packages/connecting-a-repository-to-a-package#connecting-a-repository-to-a-container-image-using-the-command-line
+        source = self.image_source()
+        if source is not None:
+            cmd += [ '--label', 
+                    f'org.opencontainers.image.source={source}' ]
 
         if self.repo_config is not None:
             # If the repo2docker config file does not exist inside the repo already, assume it is a URL