From 5086d5ffaaa8fb60905a7ecd695d994797917723 Mon Sep 17 00:00:00 2001 From: Dylan Simon Date: Tue, 26 May 2026 12:04:54 -0400 Subject: [PATCH 1/6] jenkins: migration to new k8s-based jenkins system This is a significant structural change to run all the tests in a single container in parallel. --- jenkins/Dockerfile | 3 +- jenkins/Jenkinsfile | 119 ++++++++++++++++---------------------------- 2 files changed, 46 insertions(+), 76 deletions(-) diff --git a/jenkins/Dockerfile b/jenkins/Dockerfile index 0eff62b2b..9dc2d62b5 100644 --- a/jenkins/Dockerfile +++ b/jenkins/Dockerfile @@ -14,12 +14,13 @@ RUN apt-get update && \ git-lfs \ python3 \ ruby \ + parallel \ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ rm -rf /var/cache/apt/* /var/lib/apt/lists/* RUN curl -L http://user.astro.wisc.edu/~townsend/resource/download/mesasdk/mesasdk-x86_64-linux-26.3.2.tar.gz | \ tar xzf - -C /opt/ -ENV MESASDK_ROOT=/opt/mesasdk +ENV MESASDK_ROOT=/opt/mesasdk-26.3.2 RUN bash $MESASDK_ROOT/bin/mesasdk_init.sh RUN gem install mesa_test -v 1.1.11 diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 52a744a37..693faa94a 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -3,84 +3,53 @@ properties([ buildDiscarder(logRotator(numToKeepStr: '8', daysToKeepStr: '20')) ]) -def labels = 'linux && docker && mesa' -/* use HOME=MESA_DIR=customWorkspace=dir for build */ -def dir -def image -def vars = [ - /* 4*4=16 isn't quite right as we're limited to 4, but in practice it seems work out */ - "OMP_NUM_THREADS=4", - "NPROCS=4" - ] -def ntests = 0 +def parallel_tests = 4; +def cores_per_test = 4; -node(labels) { - stage('build') { - timeout(time: 1, unit: 'HOURS') { - dir = env.WORKSPACE - withEnv(vars + ["HOME=${dir}", "MESA_DIR=${dir}"]) { - checkout scm - withCredentials([ - usernamePassword(credentialsId: 'mesa_test', passwordVariable: 'MESA_TEST_PSW', usernameVariable: 'MESA_TEST_USR'), - string(credentialsId: 'mesa_test_logs_token', variable: 'MESA_LOGS_TOKEN')]) { - writeYaml(file: "${dir}/.mesa_test/config.yml", overwrite: true, data: [ - 'computer_name': 'Jenkins', - 'email': env.MESA_TEST_USR, - 'password': env.MESA_TEST_PSW, - 'logs_token': env.MESA_LOGS_TOKEN, - 'github_protocol': ':ssh', - 'mesa_mirror': "${dir}/mirror", /* not used */ - 'mesa_work': dir, - 'platform': 'Linux', - 'platform_version': 'Ubuntu' - ]) - } - image = docker.build("flatironinstitute/mesa:${env.BRANCH_NAME}", "jenkins") - image.inside() { - sh '''#!/bin/bash -ex - source $MESASDK_ROOT/bin/mesasdk_init.sh - mesa_test install --no-checkout - mesa_test submit --empty - ''' - def nt = sh returnStdout: true, script: '''#!/bin/bash - source $MESASDK_ROOT/bin/mesasdk_init.sh - ntests=0 - for f in $MESA_DIR/*/test_suite ; do - nt=$(cd $f && ./count_tests) - (( ntests+=nt )) - done - echo $ntests - ''' - ntests = nt.toInteger() - } +buildPod(context: 'jenkins', cpus: parallel_tests*cores_per_test, memory: '128Gi') { + withEnv([ + "MESA_DIR=$WORKSPACE", + "HOME=$WORKSPACE", + "NTESTS=$parallel_tests", + "NPROCS=$cores_per_test", + "OMP_NUM_THREADS=4" // this will oversubscribe, but it generally seems to be okay + ]) { + stage('setup') { + withCredentials([ + usernamePassword(credentialsId: 'mesa_test', passwordVariable: 'MESA_TEST_PSW', usernameVariable: 'MESA_TEST_USR'), + string(credentialsId: 'mesa_test_logs_token', variable: 'MESA_LOGS_TOKEN')]) { + writeYaml(file: "$WORKSPACE/.mesa_test/config.yml", overwrite: true, data: [ + 'computer_name': 'Jenkins', + 'email': env.MESA_TEST_USR, + 'password': env.MESA_TEST_PSW, + 'logs_token': env.MESA_LOGS_TOKEN, + 'github_protocol': ':ssh', + 'mesa_mirror': "$WORKSPACE/mirror", /* not used */ + 'mesa_work': env.WORKSPACE, + 'platform': 'Linux', + 'platform_version': 'Ubuntu' + ]) } + sh '''#!/bin/bash -ex + source $MESASDK_ROOT/bin/mesasdk_init.sh + mesa_test install --no-checkout + mesa_test submit --empty + ''' } - } -} - -def testStages = [:] -for (int i = 1; i <= ntests; i++) { - def test = i - def name = "test ${test}" - testStages[name] = { -> - node(labels) { - stage(name) { - timeout(time: 24, unit: 'HOURS') { - withEnv(vars + ["HOME=${dir}", "MESA_DIR=${dir}"]) { - catchError(buildResult: 'UNSTABLE', stageResult: 'UNSTABLE') { - image.inside(env.WORKSPACE == dir ? "" : "-v ${dir}:${dir}:rw") { - sh """#!/bin/bash -e - source \$MESASDK_ROOT/bin/mesasdk_init.sh - - mesa_test test $test --force-logs - """ - } - } - } - } - } + stage('tests') { + sh '''#!/bin/bash + source $MESASDK_ROOT/bin/mesasdk_init.sh + ntests=0 + for f in $MESA_DIR/*/test_suite ; do + nt=$(cd $f && ./count_tests) + (( ntests+=nt )) + done + unset PARALLEL + seq 1 $ntests | parallel -j$NTESTS --joblog test.log --timeout 86400 mesa_test test {} --force-logs + r=$? + cat test.log + exit $r + ''' } } } - -parallel testStages From a58079566bd0b8201e063d936a45570c3742ab9f Mon Sep 17 00:00:00 2001 From: Dylan Simon Date: Tue, 26 May 2026 18:06:03 -0400 Subject: [PATCH 2/6] jenkins: use 8 cores per test --- jenkins/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 693faa94a..3d4334038 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -4,7 +4,7 @@ properties([ ]) def parallel_tests = 4; -def cores_per_test = 4; +def cores_per_test = 8; buildPod(context: 'jenkins', cpus: parallel_tests*cores_per_test, memory: '128Gi') { withEnv([ From 60602ab6c222830c1f5fb2bcb2d00e59c0ac6ef4 Mon Sep 17 00:00:00 2001 From: Dylan Simon Date: Tue, 26 May 2026 19:27:21 -0400 Subject: [PATCH 3/6] jenkins: set OMP_NUM_THREADS=cores_per_test seems like tests don't use NPROCS? Also increase to 6 tests parallel. --- jenkins/Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 3d4334038..3e7994889 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -3,7 +3,7 @@ properties([ buildDiscarder(logRotator(numToKeepStr: '8', daysToKeepStr: '20')) ]) -def parallel_tests = 4; +def parallel_tests = 6; def cores_per_test = 8; buildPod(context: 'jenkins', cpus: parallel_tests*cores_per_test, memory: '128Gi') { @@ -11,8 +11,8 @@ buildPod(context: 'jenkins', cpus: parallel_tests*cores_per_test, memory: '128Gi "MESA_DIR=$WORKSPACE", "HOME=$WORKSPACE", "NTESTS=$parallel_tests", - "NPROCS=$cores_per_test", - "OMP_NUM_THREADS=4" // this will oversubscribe, but it generally seems to be okay + "NPROCS=$cores_per_test", // this is only used by build? + "OMP_NUM_THREADS=$cores_per_test" // this is used by tests? ]) { stage('setup') { withCredentials([ From be4bbee08a8480b3396b142bb9b6ca9cd065c381 Mon Sep 17 00:00:00 2001 From: Dylan Simon Date: Wed, 27 May 2026 09:19:50 -0400 Subject: [PATCH 4/6] jenkins: update mesa_test to 1.2.0 As per @wmwolf --- jenkins/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/Dockerfile b/jenkins/Dockerfile index 9dc2d62b5..6cd071610 100644 --- a/jenkins/Dockerfile +++ b/jenkins/Dockerfile @@ -23,4 +23,4 @@ RUN curl -L http://user.astro.wisc.edu/~townsend/resource/download/mesasdk/mesas tar xzf - -C /opt/ ENV MESASDK_ROOT=/opt/mesasdk-26.3.2 RUN bash $MESASDK_ROOT/bin/mesasdk_init.sh -RUN gem install mesa_test -v 1.1.11 +RUN gem install mesa_test -v 1.2.0 From 143706cf122255c021600b0f5b495ea3e40b9f35 Mon Sep 17 00:00:00 2001 From: Dylan Simon Date: Wed, 27 May 2026 10:11:21 -0400 Subject: [PATCH 5/6] jenkins: don't set NPROCS, try 5 parallel --- jenkins/Jenkinsfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 3e7994889..400fab243 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -3,7 +3,7 @@ properties([ buildDiscarder(logRotator(numToKeepStr: '8', daysToKeepStr: '20')) ]) -def parallel_tests = 6; +def parallel_tests = 5; def cores_per_test = 8; buildPod(context: 'jenkins', cpus: parallel_tests*cores_per_test, memory: '128Gi') { @@ -11,8 +11,7 @@ buildPod(context: 'jenkins', cpus: parallel_tests*cores_per_test, memory: '128Gi "MESA_DIR=$WORKSPACE", "HOME=$WORKSPACE", "NTESTS=$parallel_tests", - "NPROCS=$cores_per_test", // this is only used by build? - "OMP_NUM_THREADS=$cores_per_test" // this is used by tests? + "OMP_NUM_THREADS=$cores_per_test" ]) { stage('setup') { withCredentials([ From 69b264330fb8ed31331dab35661e6796d260470f Mon Sep 17 00:00:00 2001 From: Debraheem Date: Tue, 2 Jun 2026 00:22:36 -0400 Subject: [PATCH 6/6] reorder test_suite testing --- star/test_suite/do1_test_source | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/star/test_suite/do1_test_source b/star/test_suite/do1_test_source index 8f1acb057..2b96b0daa 100755 --- a/star/test_suite/do1_test_source +++ b/star/test_suite/do1_test_source @@ -3,39 +3,42 @@ # setenv MESA_FPE_CHECKS_ON 1 # Slow cases -do_one ppisn "Successful test: evolved 100 days past first relax" "final.mod" x150 -do_one 1M_pre_ms_to_wd "stop because log_surface_luminosity <= log_L_lower_limit" "final.mod" auto -do_one ccsn_IIp "shock has reached target location 1" "shock_part5.mod" auto -do_one 1M_thermohaline "all values are within tolerances" "final.mod" auto do_one c13_pocket "all values are within tolerance" "final.mod" auto - -do_one split_burn_big_net "stop because have dropped below central lower limit for si28" "final.mod" skip +do_one 1M_pre_ms_to_wd "stop because log_surface_luminosity <= log_L_lower_limit" "final.mod" auto +do_one hb_2M "all values are within tolerance" "final.mod" auto +do_one ppisn "Successful test: evolved 100 days past first relax" "final.mod" x150 +do_one make_co_wd "stop because log_surface_luminosity <= log_L_lower_limit" "final.mod" auto +do_one 20M_z2m2_high_rotation "stop because have dropped below central lower limit for he4" "final.mod" auto do_one 12M_pre_ms_to_core_collapse "all values are within tolerance" "final.mod" auto -do_one 20M_pre_ms_to_core_collapse "all values are within tolerance" "final.mod" auto do_one make_pre_ccsn_13bvn "termination code: fe_core_infall_limit" "final.mod" auto +do_one ccsn_IIp "shock has reached target location 1" "shock_part5.mod" auto do_one zams_to_cc_80 "all values are within tolerance" "final.mod" auto +do_one 20M_pre_ms_to_core_collapse "all values are within tolerance" "final.mod" auto +do_one cburn_inward "Terminate as flame reached half way" "final.mod" auto +do_one make_o_ne_wd "stop because log_surface_luminosity <= log_L_lower_limit" "settled_envelope.mod" auto do_one pisn "termination code: Star is unbound" "final.mod" skip do_one 15M_dynamo "all values are within tolerances" "final.mod" auto +do_one 5M_cepheid_blue_loop "crossed blue edge to start 3rd crossing" "final.mod" auto +do_one custom_rates "stop because log_center_temperature >= log_center_temp_upper_limit" "final.mod" auto +do_one 1M_thermohaline "all values are within tolerances" "final.mod" auto + +do_one split_burn_big_net "stop because have dropped below central lower limit for si28" "final.mod" skip do_one 1.3M_ms_high_Z "stop because log_surface_luminosity >= log_L_upper_limit" "final.mod" auto do_one 1.4M_ms_op_mono "successfully used OP_mono opacities" skip skip do_one 1.5M_with_diffusion "stop because have dropped below central lower limit for h1" "final.mod" auto -do_one 5M_cepheid_blue_loop "crossed blue edge to start 3rd crossing" "final.mod" auto do_one 7M_prems_to_AGB "stop because log_surface_luminosity >= log_L_upper_limit" "final.mod" auto do_one 16M_conv_premix "termination code: xa_central_lower_limit" "tams.mod" auto do_one 16M_predictive_mix "termination code: xa_central_lower_limit" "tams.mod" auto -do_one 20M_z2m2_high_rotation "stop because have dropped below central lower limit for he4" "final.mod" auto do_one accreted_material_j "star_mass_max_limit" "final.mod" auto do_one adjust_net "finished with expected number of species" "final.mod" auto do_one carbon_kh "stop because log_center_density >= log_center_density_upper_limit" "final.mod" auto -do_one cburn_inward "Terminate as flame reached half way" "final.mod" auto do_one check_pulse_atm "all values are within tolerance" "final.mod" skip do_one check_redo "stop because have dropped below central lower limit for h1" "final.mod" auto do_one conductive_flame "all values are within tolerance" "final.mod" auto do_one conserve_angular_momentum "stop because he_core_mass >= he_core_mass_limit" "final.mod" auto do_one conv_core_cpm "Test passed: mass_conv_core within specified range" "final.mod" auto do_one custom_colors "termination code: xa_central_lower_limit" "final.mod" skip -do_one custom_rates "stop because log_center_temperature >= log_center_temp_upper_limit" "final.mod" auto do_one diffusion_smoothness "brunt_N2_composition_term is smooth enough" "final.mod" skip do_one extended_convective_penetration "All values are within tolerances" "final.mod" auto @@ -46,7 +49,6 @@ do_one gyre_in_mesa_rsg "matched target" skip skip do_one gyre_in_mesa_spb "matched target" skip skip do_one gyre_in_mesa_wd "matched target" skip skip -do_one hb_2M "all values are within tolerance" "final.mod" auto do_one high_mass "stop because have dropped below central lower limit for h1" "final.mod" auto do_one high_z "stop because have dropped below central lower limit for he4" "final.mod" auto do_one hot_cool_wind "all values are within tolerance" "final.mod" auto @@ -58,8 +60,6 @@ do_one make_brown_dwarf "stop because star_age >= max_age" "final.mod" auto do_one make_env "stop because star_age >= max_age" "env.mod" skip do_one make_he_wd "stop because star_age >= max_age" "final.mod" auto do_one make_metals "stop because have dropped below central lower limit for he4" "final.mod" auto -do_one make_co_wd "stop because log_surface_luminosity <= log_L_lower_limit" "final.mod" auto -do_one make_o_ne_wd "stop because log_surface_luminosity <= log_L_lower_limit" "settled_envelope.mod" auto do_one make_planets "stop because star_age >= max_age" "planet_evolve_1.0_MJ_10.0_ME_2.0_RJ.mod" auto do_one make_sdb "stop because have risen above central upper limit for c12" "sdb.mod" skip do_one make_zams "stop because Lnuc_div_L >= Lnuc_div_L_zams_limit" "zams.mod" skip