Skip to content

Commit 2ed5794

Browse files
committed
feat(supervisor): custom tolerations for scheduled runs
1 parent 9cb3dcb commit 2ed5794

File tree

2 files changed

+75
-14
lines changed

2 files changed

+75
-14
lines changed

apps/supervisor/src/env.ts

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,16 @@ const Env = z
123123
KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods
124124
// Large machine affinity settings - large-* presets prefer a dedicated pool
125125
KUBERNETES_LARGE_MACHINE_AFFINITY_ENABLED: BoolEnv.default(false),
126-
KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_KEY: z.string().trim().min(1).default("node.cluster.x-k8s.io/machinepool"),
127-
KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_VALUE: z.string().trim().min(1).default("large-machines"),
126+
KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_KEY: z
127+
.string()
128+
.trim()
129+
.min(1)
130+
.default("node.cluster.x-k8s.io/machinepool"),
131+
KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_VALUE: z
132+
.string()
133+
.trim()
134+
.min(1)
135+
.default("large-machines"),
128136
KUBERNETES_LARGE_MACHINE_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(100),
129137

130138
// Project affinity settings - pods from the same project prefer the same node
@@ -137,11 +145,29 @@ const Env = z
137145
.default("kubernetes.io/hostname"),
138146

139147
// Schedule affinity settings - runs from schedule trees prefer a dedicated pool
140-
KUBERNETES_SCHEDULE_AFFINITY_ENABLED: BoolEnv.default(false),
141-
KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY: z.string().trim().min(1).default("node.cluster.x-k8s.io/machinepool"),
142-
KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE: z.string().trim().min(1).default("scheduled-runs"),
143-
KUBERNETES_SCHEDULE_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(80),
144-
KUBERNETES_SCHEDULE_ANTI_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(20),
148+
KUBERNETES_SCHEDULED_RUN_AFFINITY_ENABLED: BoolEnv.default(false),
149+
KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_KEY: z
150+
.string()
151+
.trim()
152+
.min(1)
153+
.default("node.cluster.x-k8s.io/machinepool"),
154+
KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_VALUE: z
155+
.string()
156+
.trim()
157+
.min(1)
158+
.default("scheduled-runs"),
159+
KUBERNETES_SCHEDULED_RUN_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(80),
160+
KUBERNETES_SCHEDULED_RUN_ANTI_AFFINITY_WEIGHT: z.coerce
161+
.number()
162+
.int()
163+
.min(1)
164+
.max(100)
165+
.default(20),
166+
167+
// Schedule toleration settings - scheduled runs tolerate taints on the dedicated pool
168+
// Comma-separated list of tolerations in the format: key=value:effect
169+
// For Exists operator (no value): key:effect
170+
KUBERNETES_SCHEDULED_RUN_TOLERATIONS: z.string().optional(),
145171

146172
// Placement tags settings
147173
PLACEMENT_TAGS_ENABLED: BoolEnv.default(false),

apps/supervisor/src/workloadManager/kubernetes.ts

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
121121
spec: {
122122
...this.addPlacementTags(this.#defaultPodSpec, opts.placementTags),
123123
affinity: this.#getAffinity(opts),
124+
tolerations: this.#getScheduleTolerations(this.#isScheduledRun(opts)),
124125
terminationGracePeriodSeconds: 60 * 60,
125126
containers: [
126127
{
@@ -485,7 +486,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
485486
}
486487

487488
#getScheduleNodeAffinityRules(isScheduledRun: boolean): k8s.V1NodeAffinity | undefined {
488-
if (!env.KUBERNETES_SCHEDULE_AFFINITY_ENABLED || !env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE) {
489+
if (!env.KUBERNETES_SCHEDULED_RUN_AFFINITY_ENABLED || !env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_VALUE) {
489490
return undefined;
490491
}
491492

@@ -494,13 +495,13 @@ export class KubernetesWorkloadManager implements WorkloadManager {
494495
return {
495496
preferredDuringSchedulingIgnoredDuringExecution: [
496497
{
497-
weight: env.KUBERNETES_SCHEDULE_AFFINITY_WEIGHT,
498+
weight: env.KUBERNETES_SCHEDULED_RUN_AFFINITY_WEIGHT,
498499
preference: {
499500
matchExpressions: [
500501
{
501-
key: env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY,
502+
key: env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_KEY,
502503
operator: "In",
503-
values: [env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE],
504+
values: [env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_VALUE],
504505
},
505506
],
506507
},
@@ -513,13 +514,13 @@ export class KubernetesWorkloadManager implements WorkloadManager {
513514
return {
514515
preferredDuringSchedulingIgnoredDuringExecution: [
515516
{
516-
weight: env.KUBERNETES_SCHEDULE_ANTI_AFFINITY_WEIGHT,
517+
weight: env.KUBERNETES_SCHEDULED_RUN_ANTI_AFFINITY_WEIGHT,
517518
preference: {
518519
matchExpressions: [
519520
{
520-
key: env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY,
521+
key: env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_KEY,
521522
operator: "NotIn",
522-
values: [env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE],
523+
values: [env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_VALUE],
523524
},
524525
],
525526
},
@@ -528,6 +529,40 @@ export class KubernetesWorkloadManager implements WorkloadManager {
528529
};
529530
}
530531

532+
#getScheduleTolerations(isScheduledRun: boolean): k8s.V1Toleration[] | undefined {
533+
if (!isScheduledRun || !env.KUBERNETES_SCHEDULED_RUN_TOLERATIONS) {
534+
return undefined;
535+
}
536+
537+
return env.KUBERNETES_SCHEDULED_RUN_TOLERATIONS.split(",")
538+
.map((entry) => entry.trim())
539+
.filter((entry) => entry.length > 0)
540+
.map((entry) => {
541+
// Format: key=value:effect or key:effect (Exists operator)
542+
const colonIdx = entry.lastIndexOf(":");
543+
if (colonIdx === -1) {
544+
throw new Error(`Invalid toleration format (missing effect): "${entry}"`);
545+
}
546+
547+
const effect = entry.slice(colonIdx + 1) as k8s.V1Toleration["effect"];
548+
const keyValue = entry.slice(0, colonIdx);
549+
const eqIdx = keyValue.indexOf("=");
550+
551+
if (eqIdx === -1) {
552+
// key:effect -> Exists operator
553+
return { key: keyValue, operator: "Exists" as const, effect };
554+
}
555+
556+
// key=value:effect -> Equal operator
557+
return {
558+
key: keyValue.slice(0, eqIdx),
559+
operator: "Equal" as const,
560+
value: keyValue.slice(eqIdx + 1),
561+
effect,
562+
};
563+
});
564+
}
565+
531566
#getProjectPodAffinity(projectId: string): k8s.V1PodAffinity | undefined {
532567
if (!env.KUBERNETES_PROJECT_AFFINITY_ENABLED) {
533568
return undefined;

0 commit comments

Comments
 (0)