Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,7 @@ volumes/
*.key
*.pem
.jwks

# Python
__pycache__/
*.pyc
6 changes: 2 additions & 4 deletions front_end/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ RUN apt-get update && apt-get install -y libmcrypt-dev \
&& docker-php-ext-install gd

RUN apt-get update && apt-get install -y gnupg curl
RUN curl -fsSL https://www.mongodb.org/static/pgp/server-8.0.asc |gpg -o /usr/share/keyrings/mongodb-server-8.0.gpg --dearmor
RUN curl -fsSL https://www.mongodb.org/static/pgp/server-8.0.asc | gpg -o /usr/share/keyrings/mongodb-server-8.0.gpg --dearmor
RUN echo "deb [ signed-by=/usr/share/keyrings/mongodb-server-8.0.gpg ] http://repo.mongodb.org/apt/debian bookworm/mongodb-org/8.0 main" | tee /etc/apt/sources.list.d/mongodb-org-8.0.list
RUN apt-get update
RUN apt-get install -y mongodb-org
Expand All @@ -39,9 +39,7 @@ WORKDIR /var/www/html
ADD openVRE /var/www/html/openVRE
RUN cd openVRE
WORKDIR /var/www/html/openVRE
RUN curl -sS https://getcomposer.org/installer | php -- --install-dir=/usr/local/bin --filename=composer
RUN composer self-update
RUN composer update --ignore-platform-req=ext-mongodb --ignore-platform-req=ext-mongodb
# Dependencies are pre-seeded in ./openVRE/vendor for offline/reproducible builds.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why removing this? The dependencies are defined in composer.json and are versioned, so the builds should be reproducible

RUN mkdir logs
RUN touch logs/application.log
RUN chmod -R 777 logs/application.log
Expand Down
603 changes: 603 additions & 0 deletions front_end/openVRE/public/phplib/classes/ProcessK8s.php
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In all the functions with parameters: add the types to the contruct function and remove from the comments

Large diffs are not rendered by default.

24 changes: 22 additions & 2 deletions front_end/openVRE/public/phplib/classes/Tooljob.php
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ public function __construct($tool, $execution = "", $project = "", $descrip = ""
switch ($this->launcher) {
case "SGE":
case "docker_SGE":
case "kubernetes_native":
$this->root_dir_virtual = $GLOBALS['clouds'][$this->cloudName]['dataDir_virtual'] . "/" . $_SESSION['User']['id'];
$this->root_dir_mug = $GLOBALS['clouds'][$this->cloudName]['dataDir_virtual'];
$this->pub_dir_virtual = $GLOBALS['clouds'][$this->cloudName]['pubDir_virtual'];
Expand Down Expand Up @@ -978,6 +979,7 @@ public function prepareExecution($tool, $metadata, $dataLocations = [], $metadat

switch ($this->launcher) {
case "SGE":
case "kubernetes_native":
$cmd = $this->setBashCmd_SGE($tool);
$this->createSubmitFile_SGE($cmd);

Expand Down Expand Up @@ -1208,6 +1210,10 @@ protected function setBashCommandDockerSge($tool)
" --out_metadata " . $this->stageout_file_virtual .
" --log_file " . $this->log_file_virtual;

if (isset($this->launcher) && $this->launcher === "kubernetes_native") {
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The launcher kubernetes_native never does not call this function setBashCommandDockerSge so this condition cannot happen

return $cmd_vre;
}


$cmd = "docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock -d" .
" " . $cmd_envs .
Expand Down Expand Up @@ -1438,7 +1444,7 @@ public function submit($tool)
case "SGE":
case "ega_demo":
case "docker_SGE":
return $this->enqueue($tool);
case "kubernetes_native":
case "Slurm_Singularity":
return $this->enqueue($tool);
default:
Expand All @@ -1461,8 +1467,22 @@ protected function enqueue($tool)
$cpus = $launcherInfo['cpus'] ?? $tool['infrastructure']['cpus'];
$queue = $launcherInfo['queue'] ?? $tool['infrastructure']['clouds'][$this->cloudName]['queue'];
$this->logger->info("Resolved Parameters: Queue=$queue, CPUs=$cpus, Memory=$memory");
$jobOptions = array();
if ($jobManager === "kubernetes_native") {
$jobOptions["image"] = $tool['infrastructure']['container_image'] ?? "";
if ($jobOptions["image"] === "") {
$_SESSION['errorData']['Error'][] = "Missing infrastructure.container_image for kubernetes_native launcher.";
return 0;
}
$jobOptions["env"] = array();
if (isset($tool['infrastructure']['container_env']) && is_array($tool['infrastructure']['container_env'])) {
foreach ($tool['infrastructure']['container_env'] as $env_key => $env_value) {
$jobOptions["env"][$env_key] = (string)$env_value;
}
}
}

$pid = execJob($this->working_dir, $this->submission_file, $queue, $cpus, $memory, $this->stdout_file, $this->stderr_file, $jobManager);
$pid = execJob($this->working_dir, $this->submission_file, $queue, $cpus, $memory, $this->stdout_file, $this->stderr_file, $jobManager, $this->toolId, $jobOptions);
$this->logger->info("Tool job submitted to SGE queue '$queue' (PID=$pid)");

$this->pid = $pid;
Expand Down
48 changes: 44 additions & 4 deletions front_end/openVRE/public/phplib/processJob.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use OpenVRE\LoggerFactory;
use OpenVRE\NotFoundException;
use OpenVRE\ProcessK8s;
use OpenVRE\ProcessSGE;
use OpenVRE\ProcessSlurm;

Expand All @@ -18,7 +19,7 @@ function getJobProcessLogger()
}


function execJob($workDir, $shFile, $queue, $cpus = 1, $mem = 0, $logFile = "job_output.log", $errFile = "job_error.log", $jobManager = "docker_SGE")
function execJob($workDir, $shFile, $queue, $cpus = 1, $mem = 0, $logFile = "job_output.log", $errFile = "job_error.log", $jobManager = "docker_SGE", $toolId = "", $jobOptions = array())
{
getJobProcessLogger()->info("Start job submission via SGE");

Expand Down Expand Up @@ -55,6 +56,23 @@ function execJob($workDir, $shFile, $queue, $cpus = 1, $mem = 0, $logFile = "job
getJobProcessLogger()->info("Submitting job via Slurm_Singularity to $remote_system. Parameters: shFile=$shFile, workDir=$workDir, logFile=$logFile, errFile=$errFile");
$process = new ProcessSlurm($shFile, $workDir, $logFile, $errFile, $remote_system);
break;
case "kubernetes_native":
$schedUrl = getenv("OPENVRE_K8S_SCHEDULER_URL") ?: "";
$schedHost = $schedUrl !== ""
? (string)(parse_url($schedUrl, PHP_URL_HOST) ?: "(parse_failed)")
: "(not_set)";
$k8sNs = getenv("OPENVRE_K8S_NAMESPACE") ?: "(env_unset)";
$jobOptKeys = is_array($jobOptions) && count($jobOptions)
? implode(",", array_keys($jobOptions))
: "(none)";
error_log(
"DEBUG: Submitting job via kubernetes_native. Parameters: shFile=$shFile, workDir=$workDir, queue=$queue, "
. "jobname=$jobname, cpus=$cpus, mem=$mem, logFile=$logFile, errFile=$errFile, "
. "namespace=$k8sNs, scheduler_host=$schedHost, jobOptions_keys=$jobOptKeys"
);
require_once __DIR__ . "/classes/ProcessK8s.php";
$process = new ProcessK8s($shFile, $workDir, $queue, $jobname, $cpus, $mem, $logFile, $errFile, $jobOptions);
break;
default:
$process = new ProcessSGE($shFile, $workDir, $queue, $jobname, $cpus, $mem, $logFile, $errFile);
break;
Expand Down Expand Up @@ -82,15 +100,26 @@ function getRunningJobInfo($pid, $launcherType = null)

if (is_null($launcherType) && is_numeric($pid)) {
$launcherType = "SGE";
} elseif (strpos((string)$pid, "-") !== false) {
$launcherType = "kubernetes_native";
}

if (!in_array($launcherType, array("SGE", "docker_SGE", "Slurm_Singularity"))) {
if ($launcherType == "SGE" || $launcherType == "docker_SGE") {
$process = new ProcessSGE();
$job = $process->getRunningJobInfo($pid);
} elseif ($launcherType == "kubernetes_native") {
require_once __DIR__ . "/classes/ProcessK8s.php";
$process = new ProcessK8s();
$job = $process->getRunningJobInfo($pid);
} elseif ($launcherType == "Slurm_Singularity") {
$process = new ProcessSlurm();
$job = $process->getRunningJobInfo($pid);
} else {
getJobProcessLogger()->error("Cannot monitor job '$pid' of type '$launcherType'. Launcher not implemented.");
throw new UnexpectedValueException("Cannot monitor job '$pid' of type '$launcherType'. Launcher not implemented.");
}

$process = new ProcessSGE();
return $process->getRunningJobInfo($pid);
return $job;
}


Expand Down Expand Up @@ -190,13 +219,24 @@ function delJob($pid, $launcherType = null, $login = null)
// guess launcher
if (!$launcherType && is_numeric($pid)) {
$launcherType = "docker_SGE";
} elseif (strpos((string)$pid, "-") !== false) {
$launcherType = "kubernetes_native";
}

// cancel job
$r_sge = false;
if ($launcherType == "SGE" || $launcherType == "docker_SGE") {
$processSGE = new ProcessSGE();
list($r_sge, $msg_sge) = $processSGE->stop($pid);
} elseif ($launcherType == "kubernetes_native") {
getJobProcessLogger()->debug("delJob kubernetes_native pid=$pid calling ProcessK8s::stop");
require_once __DIR__ . "/classes/ProcessK8s.php";
$processK8s = new ProcessK8s();
list($r_sge, $msg_sge) = $processK8s->stop($pid);
getJobProcessLogger()->debug(
"delJob kubernetes_native pid=$pid stop_ok=" . ($r_sge ? "1" : "0")
. " msg=" . $msg_sge
);
} else {
getJobProcessLogger()->error("Cannot delete job of type '$launcherType' [id = $pid]. Launcher not implemented.");
throw new UnexpectedValueException("Cannot delete job of type '$launcherType' [id = $pid]. Launcher not implemented.");
Expand Down
6 changes: 5 additions & 1 deletion front_end/openVRE/public/phplib/projects.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -1908,7 +1908,7 @@ function resolvePath_toLocalAbsolutePath($path, $job)
// path is an absolute path
if (preg_match('/^\//', $path)) {
if (preg_match('/^' . preg_quote($job['root_dir_virtual'], '/') . '/', $path)) {
if ($job['launcher'] == "SGE" || $job['launcher'] == "ega_demo" || $job['launcher'] == "docker_SGE") {
if ($job['launcher'] == "SGE" || $job['launcher'] == "ega_demo" || $job['launcher'] == "docker_SGE" || $job['launcher'] == "kubernetes_native") {
$rfn = str_replace($job['root_dir_mug'], $GLOBALS['dataDir'], $path);
}
// direct from path
Expand Down Expand Up @@ -1936,6 +1936,10 @@ function resolvePath_toLocalAbsolutePath($path, $job)
}
}
//clean slashes
if ($rfn === "") {
// Keep original absolute path instead of returning empty when launcher mapping is missing.
$rfn = $path;
}
$rfn = preg_replace('#/+#', '/', $rfn);

//return absolute path
Expand Down
12 changes: 12 additions & 0 deletions scheduler/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
.git
.gitignore
__pycache__/
*.pyc
*.pyo
*.pyd
.pytest_cache/
.mypy_cache/
.ruff_cache/
dist/
build/
*.egg-info/
11 changes: 11 additions & 0 deletions scheduler/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.11-alpine

WORKDIR /app

COPY app.py /app/app.py

EXPOSE 8080

USER 65534:65534

CMD ["python", "/app/app.py"]
133 changes: 133 additions & 0 deletions scheduler/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import json
import os
import ssl
import urllib.error
import urllib.parse
import urllib.request
from http.server import BaseHTTPRequestHandler, HTTPServer


TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
CA_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
API_HOST = os.environ.get("KUBERNETES_SERVICE_HOST", "kubernetes.default.svc")
API_PORT = os.environ.get("KUBERNETES_SERVICE_PORT", "443")
DEFAULT_NAMESPACE = os.environ.get("DEFAULT_NAMESPACE", "default")
SCHEDULER_AUTH_TOKEN = os.environ.get("SCHEDULER_AUTH_TOKEN", "")

with open(TOKEN_PATH, "r", encoding="utf-8") as f:
SA_TOKEN = f.read().strip()

SSL_CTX = ssl.create_default_context(cafile=CA_PATH)


def k8s_request(method, path, body=None, content_type="application/json"):
url = f"https://{API_HOST}:{API_PORT}{path}"
data = body.encode("utf-8") if body is not None else None
req = urllib.request.Request(url, method=method, data=data)
req.add_header("Authorization", f"Bearer {SA_TOKEN}")
if body is not None:
req.add_header("Content-Type", content_type)
try:
with urllib.request.urlopen(req, context=SSL_CTX, timeout=30) as resp:
raw = resp.read().decode("utf-8")
return resp.getcode(), raw
except urllib.error.HTTPError as e:
raw = e.read().decode("utf-8") if e.fp else str(e)
return e.code, raw


class Handler(BaseHTTPRequestHandler):
def _authorized(self):
if SCHEDULER_AUTH_TOKEN == "":
return False
auth = self.headers.get("Authorization", "")
return auth == f"Bearer {SCHEDULER_AUTH_TOKEN}"

def _json(self, code, payload):
out = json.dumps(payload).encode("utf-8")
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(out)))
self.end_headers()
self.wfile.write(out)

def _read_json(self):
length = int(self.headers.get("Content-Length", "0"))
if length == 0:
return {}
raw = self.rfile.read(length).decode("utf-8")
return json.loads(raw)

def do_GET(self):
if self.path == "/healthz":
return self._json(200, {"ok": True})
if not self._authorized():
return self._json(401, {"ok": False, "error": "unauthorized"})
if self.path.startswith("/jobs/"):
name_q = self.path[len("/jobs/"):]
name, _, query = name_q.partition("?")
params = urllib.parse.parse_qs(query)
ns = params.get("namespace", [DEFAULT_NAMESPACE])[0]
code, raw = k8s_request("GET", f"/apis/batch/v1/namespaces/{ns}/jobs/{name}")
if code == 404:
return self._json(200, {"ok": True, "exists": False, "job": ""})
if code >= 300:
return self._json(500, {"ok": False, "error": raw})
return self._json(200, {"ok": True, "exists": True, "job": raw})
return self._json(404, {"ok": False, "error": "not found"})

def do_POST(self):
if not self._authorized():
return self._json(401, {"ok": False, "error": "unauthorized"})
if self.path != "/jobs":
return self._json(404, {"ok": False, "error": "not found"})
try:
body = self._read_json()
ns = body.get("namespace") or DEFAULT_NAMESPACE
manifest = body.get("manifest")
if not manifest:
return self._json(400, {"ok": False, "error": "manifest is required"})
code, raw = k8s_request(
"POST",
f"/apis/batch/v1/namespaces/{ns}/jobs",
body=manifest,
content_type="application/yaml",
)
if code >= 300:
return self._json(500, {"ok": False, "error": raw})
return self._json(200, {"ok": True, "stdout": raw, "stderr": ""})
except Exception as e:
return self._json(500, {"ok": False, "error": str(e)})

def do_DELETE(self):
if not self._authorized():
return self._json(401, {"ok": False, "error": "unauthorized"})
if not self.path.startswith("/jobs/"):
return self._json(404, {"ok": False, "error": "not found"})
name_q = self.path[len("/jobs/"):]
name, _, query = name_q.partition("?")
params = urllib.parse.parse_qs(query)
ns = params.get("namespace", [DEFAULT_NAMESPACE])[0]
delete_opts = '{"apiVersion":"batch/v1","kind":"DeleteOptions","propagationPolicy":"Background"}'
code, raw = k8s_request(
"DELETE",
f"/apis/batch/v1/namespaces/{ns}/jobs/{name}",
body=delete_opts,
content_type="application/json",
)
if code == 404:
return self._json(
200,
{"ok": True, "stdout": f"job.batch/{name} already deleted", "stderr": ""},
)
if code >= 300:
return self._json(500, {"ok": False, "error": raw})
return self._json(200, {"ok": True, "stdout": raw, "stderr": ""})

def log_message(self, fmt, *args):
return


if __name__ == "__main__":
server = HTTPServer(("0.0.0.0", 8080), Handler)
server.serve_forever()
16 changes: 13 additions & 3 deletions sge/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,19 @@ RUN useradd -m application \
&& echo "application:application" | chpasswd

# Make sure host and container share the same GID for group 'docker', bc it has reading permissions to the socket file
ARG DOCKER_GROUP
RUN groupmod -g $DOCKER_GROUP docker
RUN usermod -aG docker application

#ARG DOCKER_GROUP
#RUN groupmod -g $DOCKER_GROUP docker
#RUN usermod -aG docker application

ARG DOCKER_GROUP=1002
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already in the .env file

RUN set -eux; \
if getent group docker >/dev/null 2>&1; then \
groupmod -g "${DOCKER_GROUP}" docker; \
else \
groupadd -g "${DOCKER_GROUP}" docker; \
fi; \
usermod -aG docker application
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docker group already exists at this point so I would keep the old code


# Add setup script and set permissions
ADD setup_gridengine.sh /usr/local/bin/setup_gridengine.sh
Expand Down