OCTRL-1081 kubectl task to create rudimentary bridge to kubernetes

Michal Tichák · Michal Tichák · commit c9c9467db3f9 · 2026-03-18T16:44:50.000+01:00
user infor setup properly...

kubectl passes arguments properly to the kubernetes

attempt for fairmq

bla
diff --git a/README.md b/README.md
@@ -193,6 +193,8 @@ There are two ways of interacting with AliECS:
       * [Sampling reservoir](/docs/metrics.md#sampling-reservoir)
   * [OCC API debugging with grpcc](/docs/using_grpcc_occ.md#occ-api-debugging-with-grpcc)
   * [Running tasks inside docker](/docs/running_docker.md#running-a-task-inside-a-docker-container)
+  * Kubernetes
+    * [ECS bridge to Kubernetes](/docs/kubernetes_ecs.md)
 * Resources
   * T. Mrnjavac et. al, [AliECS: A New Experiment Control System for the ALICE Experiment](https://doi.org/10.1051/epjconf/202429502027), CHEP23
 
diff --git a/common/controlmode/controlmode.go b/common/controlmode/controlmode.go
@@ -39,6 +39,8 @@ const (
 	FAIRMQ
 	BASIC
 	HOOK
+	KUBECTL_DIRECT
+	KUBECTL_FAIRMQ
 )
 
 func (cm ControlMode) String() string {
@@ -51,6 +53,10 @@ func (cm ControlMode) String() string {
 		return "basic"
 	case HOOK:
 		return "hook"
+	case KUBECTL_DIRECT:
+		return "kubectl_direct"
+	case KUBECTL_FAIRMQ:
+		return "kubectl_fairmq"
 	}
 	return "direct"
 }
@@ -71,6 +77,10 @@ func (cm *ControlMode) UnmarshalText(b []byte) error {
 		*cm = BASIC
 	case "hook":
 		*cm = HOOK
+	case "kubectl_direct":
+		*cm = KUBECTL_DIRECT
+	case "kubectl_fairmq":
+		*cm = KUBECTL_FAIRMQ
 	default:
 		*cm = DIRECT
 	}
diff --git a/core/task/scheduler.go b/core/task/scheduler.go
@@ -1432,7 +1432,8 @@ func makeTaskForMesosResources(
 		cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%d", "OCC_CONTROL_PORT", controlPort))
 	}
 
-	if cmd.ControlMode == controlmode.FAIRMQ {
+	if cmd.ControlMode == controlmode.FAIRMQ ||
+		cmd.ControlMode == controlmode.KUBECTL_FAIRMQ {
 		cmd.Arguments = append(cmd.Arguments, "--control-port", strconv.FormatUint(controlPort, 10))
 	}
 
diff --git a/core/task/task.go b/core/task/task.go
@@ -286,7 +286,9 @@ func (t *Task) BuildTaskCommand(role parentRole) (err error) {
 		if class.Control.Mode == controlmode.BASIC ||
 			class.Control.Mode == controlmode.HOOK ||
 			class.Control.Mode == controlmode.DIRECT ||
-			class.Control.Mode == controlmode.FAIRMQ {
+			class.Control.Mode == controlmode.FAIRMQ ||
+			class.Control.Mode == controlmode.KUBECTL_DIRECT ||
+			class.Control.Mode == controlmode.KUBECTL_FAIRMQ {
 			var varStack map[string]string
 
 			// First we get the full varStack from the parent role, and
@@ -393,7 +395,8 @@ func (t *Task) BuildTaskCommand(role parentRole) (err error) {
 			}
 		}
 
-		if class.Control.Mode == controlmode.FAIRMQ {
+		if class.Control.Mode == controlmode.FAIRMQ ||
+			class.Control.Mode == controlmode.KUBECTL_FAIRMQ {
 			// FIXME read this from configuration
 			// if the task class doesn't provide an id, we generate one ourselves
 			if !utils.StringSliceContains(cmd.Arguments, "--id") {
@@ -635,7 +638,9 @@ func (t *Task) BuildPropertyMap(bindMap channel.BindMap) (propMap controlcommand
 
 			// For FAIRMQ tasks, we append FairMQ channel configuration
 			if class.Control.Mode == controlmode.FAIRMQ ||
-				class.Control.Mode == controlmode.DIRECT {
+				class.Control.Mode == controlmode.DIRECT ||
+				class.Control.Mode == controlmode.KUBECTL_DIRECT ||
+				class.Control.Mode == controlmode.KUBECTL_FAIRMQ {
 				for _, inbCh := range channel.MergeInbound(parent.CollectInboundChannels(), class.Bind) {
 					// We get the FairMQ-formatted propertyMap from the inbound channel spec
 					var chanProps controlcommands.PropertyMap
diff --git a/core/task/taskclass/class.go b/core/task/taskclass/class.go
@@ -123,7 +123,6 @@ func (c *Class) UnmarshalYAML(unmarshal func(interface{}) error) (err error) {
 		}
 	}
 	return
-
 }
 
 func (c *Class) MarshalYAML() (interface{}, error) {
@@ -154,13 +153,17 @@ func (c *Class) MarshalYAML() (interface{}, error) {
 		Command:     c.Command,
 	}
 
-	if c.Control.Mode == controlmode.FAIRMQ {
-		aux.Control.Mode = "fairmq"
-	} else if c.Control.Mode == controlmode.BASIC {
-		aux.Control.Mode = "basic"
-	} else {
-		aux.Control.Mode = "direct"
-	}
+	// if c.Control.Mode == controlmode.FAIRMQ {
+	// 	aux.Control.Mode = "fairmq"
+	// } else if c.Control.Mode == controlmode.BASIC {
+	// 	aux.Control.Mode = "basic"
+	// } else if c.Control.Mode == controlmode.KUBECTL {
+	// 	aux.Control.Mode = "kubectl"
+	// } else {
+	// 	aux.Control.Mode = "direct"
+	// }
+
+	aux.Control.Mode = c.Control.Mode.String()
 
 	return aux, nil
 }
diff --git a/docs/kubernetes_ecs.md b/docs/kubernetes_ecs.md
@@ -0,0 +1,70 @@
+# ECS with Kubernetes
+
+> ⚠️ **Warning**
+> All Kubernetes work done is in a stage of prototype.
+
+## Kubernetes Cluster
+
+While prototyping we used many Kubernetes clusters, namely [`kind`](https://kind.sigs.k8s.io/), [`minikube`](https://minikube.sigs.k8s.io/docs/) and [`k3s`](https://k3s.io/)
+in both local and remote cluster deployment. We used Openstack for remote deployment.
+Follow the guides at the individual distributions in order to create the desired cluster setup.
+For now we chose `k3s` for most of the activities performed because it is lightweight
+and easily installed distribution which is also [`CNCF`](https://www.cncf.io/training/certification/) certified.
+
+All settings of `k3s` were used as default except one: locked-in-memory size. Use `ulimit -l` to learn
+what is the limit for the current user and `LimitMEMLOCK` inside the k3s systemd service config
+to set it for correct value. Right now the `flp` user has unlimited size (`LimitMEMLOCK=infinity`).
+This config is necessary because even if you are running PODs with the privileged security context
+under user flp, Kubernetes still sets limits according to its internal settings and doesn't
+respect linux settings.
+
+Another setup we expect at this moment to be present at the target nodes
+is ability to run PODs with privileged permissions and also under user `flp`.
+This means that the machine has to have `flp` user setup the same way as
+if you would do the installation with [`o2-flp-setup`](https://alice-flp.docs.cern.ch/Operations/Experts/system-configuration/utils/o2-flp-setup/).
+
+## Running tasks (`KubectlTask`)
+
+ECS is setup to run tasks through Mesos on all required hosts baremetal with active
+task management (see [`ControllableTask`](/executor/executable/controllabletask.go))
+and OCC gRPC communication. When running docker task through ECS we could easily
+wrap command to be run into the docker container with proper settings
+([see](/docs/running_docker.md)). This is however not possible for Kubernetes
+workloads as the PODs are "hidden" inside the cluster. So we plan
+to deploy our own Task Controller which will connect to and guide
+OCC state machine of required tasks. Thus we need to create custom
+POC way to communicate with Kubernetes cluster from Mesos executor.
+
+The reason why we don't call Kubernetes cluster directly from ECS core
+is that ECS does a lot of heavy lifting while deploying workloads,
+monitoring workloads and by generating a lot of configuration which
+is not trivial to replicate manually. However, if we create some class
+that would be able to deploy one task into the Kubernetes and monitor its
+state we could replicate `ControllableTask` workflow and leave ECS
+mostly intact for now, save a lot of work and focus on prototyping
+Kubernetes operator pattern.
+
+Thus [`KubectlTask`](/executor/executable/kubectltask.go) was created. This class
+is written as a wrapper around `kubectl` utility to manage Kubernetes cluster.
+It is based on following `kubectl` commands:
+
+* `apply` => `kubectl apply -f manifest.yaml` - deploys resource described inside given manifest
+* `delete` => `kubectl delete -f manifest.yaml` - deletes resource from cluster
+* `patch` => `kubectl patch -f exampletask.yaml --type='json' -p='[{"op": "replace", "path": "/spec/state", "value": "running"}]` - changes the state of resource inside cluster
+* `get` => `kubectl get -f manifest.yaml -o jsonpath='{.spec.state}'` - queries exact field of resource (`state` in the example) inside cluster.
+
+These four commands allow us to deploy and monitor status of the deployed
+resource without necessity to interact with it directly. However `KubectlTask`
+expects that resource is the CRD [Task](/control-operator/api/v1alpha1/task_types.go).
+
+In order to activate `KubectlTask` you need to change yaml template
+inside the `ControlWorkflows` directory. Namely:
+
+* add path to the kubectl manifest as the first argument in `.command.arguments` field
+* change `.control.mode` to either `kubectl_direct` or `kubectl_fairmq`
+You can find working template inside `control-operator/ecs-manifests/control-workflows/*_kube.yaml`
+
+Working kubectl manifests are to be found in `control-operator/ecs-manifests/kubernetes-manifests`.
+You can see `*test.yaml` for concrete deployable manifests by `kubectl apply`, the rest
+are the templates with variables to be filled in in a `${var}` format. `KubectlTask`
+fills these variables from env vars.
diff --git a/executor/executable/kubectltask.go b/executor/executable/kubectltask.go
diff --git a/executor/executable/kubectltask_test.go b/executor/executable/kubectltask_test.go
diff --git a/executor/executable/task.go b/executor/executable/task.go

Original file line number	Diff line number	Diff line change
`@@ -1432,7 +1432,8 @@ func makeTaskForMesosResources(`
`1432`	`1432`	`cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%d", "OCC_CONTROL_PORT", controlPort))`
`1433`	`1433`	`}`
`1434`	`1434`
`1435`		`- if cmd.ControlMode == controlmode.FAIRMQ {`
	`1435`	`+ if cmd.ControlMode == controlmode.FAIRMQ \|\|`
	`1436`	`+ cmd.ControlMode == controlmode.KUBECTL_FAIRMQ {`
`1436`	`1437`	`cmd.Arguments = append(cmd.Arguments, "--control-port", strconv.FormatUint(controlPort, 10))`
`1437`	`1438`	`}`
`1438`	`1439`
Original file line number	Diff line number	Diff line change
`@@ -123,7 +123,6 @@ func (c *Class) UnmarshalYAML(unmarshal func(interface{}) error) (err error) {`
`123`	`123`	`}`
`124`	`124`	`}`
`125`	`125`	`return`
`126`		`-`
`127`	`126`	`}`
`128`	`127`
`129`	`128`	`func (c *Class) MarshalYAML() (interface{}, error) {`
`@@ -154,13 +153,17 @@ func (c *Class) MarshalYAML() (interface{}, error) {`
`154`	`153`	`Command: c.Command,`
`155`	`154`	`}`
`156`	`155`
`157`		`- if c.Control.Mode == controlmode.FAIRMQ {`
`158`		`- aux.Control.Mode = "fairmq"`
`159`		`- } else if c.Control.Mode == controlmode.BASIC {`
`160`		`- aux.Control.Mode = "basic"`
`161`		`- } else {`
`162`		`- aux.Control.Mode = "direct"`
`163`		`- }`
	`156`	`+ // if c.Control.Mode == controlmode.FAIRMQ {`
	`157`	`+ // aux.Control.Mode = "fairmq"`
	`158`	`+ // } else if c.Control.Mode == controlmode.BASIC {`
	`159`	`+ // aux.Control.Mode = "basic"`
	`160`	`+ // } else if c.Control.Mode == controlmode.KUBECTL {`
	`161`	`+ // aux.Control.Mode = "kubectl"`
	`162`	`+ // } else {`
	`163`	`+ // aux.Control.Mode = "direct"`
	`164`	`+ // }`
	`165`	`+`
	`166`	`+ aux.Control.Mode = c.Control.Mode.String()`
`164`	`167`
`165`	`168`	`return aux, nil`
`166`	`169`	`}`