Skip to content

Distributed Nix Build #4

Distributed Nix Build

Distributed Nix Build #4

Workflow file for this run

name: Distributed Nix Build
on:
workflow_dispatch:
inputs:
debug_sleep_duration:
description: 'Sleep duration (in seconds) at the end for manual SSH debugging (0 to disable)'
required: false
type: number
default: 0
env:
BUILDER_COUNTS: '{"ubuntu-24.04": 1, "ubuntu-24.04-arm": 1, "macos-26-intel": 1, "macos-26": 1}'
EXTRA_NIX_CONFIG: |
download-attempts = 10
trusted-public-keys = cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs= cache.nixos-cuda.org:74DUi4Ye579gUqzH4ziL9IyiJBlDpMRn9MBN8oNan9M=
extra-substituters = https://nix-community.cachix.org https://cache.nixos-cuda.org
jobs:
config:
runs-on: ubuntu-slim
outputs:
builder_matrix: ${{ steps.set.outputs.builder_matrix }}
builders_list: ${{ steps.set.outputs.builders_list }}
run_suffix: ${{ steps.set.outputs.run_suffix }}
steps:
- id: set
run: |
SUFFIX=$(openssl rand -hex 3)
echo "run_suffix=$SUFFIX" >> "$GITHUB_OUTPUT"
MATRIX_JSON=$(echo '${{ env.BUILDER_COUNTS }}' | jq -c '[
to_entries[] | .key as $os | .value as $count |
range(1; $count + 1) | { os: $os, id: "\($os)-\(.)" }
]
')
echo "builder_matrix=$MATRIX_JSON" >> "$GITHUB_OUTPUT"
BUILDERS_LIST=$(echo "$MATRIX_JSON" | jq -r --arg suffix "$SUFFIX" 'map("nix-builder-\($suffix)-\(.id)") | join(" ")')
echo "builders_list=$BUILDERS_LIST" >> "$GITHUB_OUTPUT"
builder:
needs: config
name: Builder ${{ matrix.builder.id }} (${{ needs.config.outputs.run_suffix }})
runs-on: ${{ matrix.builder.os }}
strategy:
fail-fast: false
matrix:
builder: ${{ fromJSON(needs.config.outputs.builder_matrix) }}
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup Nix & Tailscale
uses: ./
with:
tailscale_authkey: ${{ secrets.TS_OAUTH_SECRET }}
tailscale_hostname: nix-builder-${{ needs.config.outputs.run_suffix }}-${{ matrix.builder.id }}
tailscale_tags: tag:nix-ci-builder
role: builder
extra_nix_config: ${{ env.EXTRA_NIX_CONFIG }}
coordinator:
needs: config
name: Coordinator (${{ needs.config.outputs.run_suffix }})
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup Nix & Tailscale
uses: ./
with:
tailscale_authkey: ${{ secrets.TS_OAUTH_SECRET }}
tailscale_hostname: nix-coordinator-${{ needs.config.outputs.run_suffix }}
tailscale_tags: tag:nix-ci-coordinator
role: coordinator
builders: ${{ needs.config.outputs.builders_list }}
extra_nix_config: ${{ env.EXTRA_NIX_CONFIG }}
- name: Install Profiling Tools
run: |
sudo apt-get update
sudo apt-get install -y iperf3 dstat linux-tools-common linux-tools-generic sysstat time linux-tools-$(uname -r) linux-cloud-tools-$(uname -r)
- name: Network & SSH Benchmark
env:
BUILDERS: ${{ needs.config.outputs.builders_list }}
run: |
TARGET=$(echo "$BUILDERS" | awk '{print $1}')
echo "Benchmarking against Builder: $TARGET"
echo "Setting up iperf3 server on $TARGET..."
ssh root@$TARGET "apt-get update && apt-get install -y iperf3 && iperf3 -s -D"
echo -e "\n=== Test 1: Pure Tailscale (iperf3) ==="
iperf3 -c $TARGET -t 10 -J > /tmp/iperf3_results.json
cat /tmp/iperf3_results.json | jq '.end.sum_received | {bitrate: .bits_per_second, bytes: .bytes}'
dd if=/dev/urandom of=/tmp/testdata.bin bs=1M count=200 status=none
echo -e "\n=== Test 2: Default SSH Pipeline ==="
/usr/bin/time -v ssh root@$TARGET "cat > /dev/null" < /tmp/testdata.bin 2> /tmp/ssh_default_time.txt
cat /tmp/ssh_default_time.txt
echo -e "\n=== Test 3: Tuned SSH Pipeline (aes128-gcm, no comp) ==="
/usr/bin/time -v ssh -o Compression=no -o Ciphers=aes128-gcm@openssh.com root@$TARGET "cat > /dev/null" < /tmp/testdata.bin 2> /tmp/ssh_tuned_time.txt
cat /tmp/ssh_tuned_time.txt
rm -f /tmp/testdata.bin
ssh root@$TARGET "pkill iperf3"
- name: Executing distributed builds
env:
RUN_SUFFIX: ${{ needs.config.outputs.run_suffix }}
run: |
cat << 'EOF' > test-build.nix
let
nixpkgs = (builtins.getFlake "nixpkgs").legacyPackages;
runSuffix = builtins.getEnv "RUN_SUFFIX";
systems = [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ];
# Number of concurrent tasks to send to EACH architecture
jobsPerSystem = 5;
buildFor = system:
let
pkgs = nixpkgs.${system};
in
builtins.genList (i:
pkgs.runCommand "info-${system}-${toString i}-${runSuffix}" {
allowSubstitutes = false;
inherit runSuffix;
} ''
mkdir -p $out
INFO_FILE=$out/info.txt
LOG_FILE=$out/log.txt
echo "=== Nix Builder Environment Info ===" > $INFO_FILE
echo "Target System : ${system}" >> $INFO_FILE
echo "Task ID : ${toString i}" >> $INFO_FILE
echo "Uname : $(uname -a)" >> $INFO_FILE
echo "Date : $(date)" >> $INFO_FILE
echo "Architecture : $(uname -m)" >> $INFO_FILE
echo "Build Cores : $NIX_BUILD_CORES" >> $INFO_FILE
if [ "$(uname -s)" = "Darwin" ]; then
MEM_BYTES=$(/usr/sbin/sysctl -n hw.memsize 2>/dev/null || echo 0)
echo "CPU Model : $(/usr/sbin/sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown')" >> $INFO_FILE
echo "Total Memory : $(( MEM_BYTES / 1073741824 )) GB" >> $INFO_FILE
elif [ -f /proc/cpuinfo ]; then
CPU_MODEL=$(awk -F: '/^model name/ {print $2; exit}' /proc/cpuinfo | sed 's/^[ \t]*//')
if [ -z "$CPU_MODEL" ]; then
CPU_MODEL=$(awk -F: '/^Hardware/ {print $2; exit}' /proc/cpuinfo | sed 's/^[ \t]*//')
fi
if [ -z "$CPU_MODEL" ]; then
CPU_IMPL=$(awk -F: '/^CPU implementer/ {print $2; exit}' /proc/cpuinfo | sed 's/^[ \t]*//')
CPU_PART=$(awk -F: '/^CPU part/ {print $2; exit}' /proc/cpuinfo | sed 's/^[ \t]*//')
if [ -n "$CPU_PART" ]; then
CPU_MODEL="ARM (Impl: $CPU_IMPL, Part: $CPU_PART)"
fi
fi
[ -z "$CPU_MODEL" ] && CPU_MODEL="Unknown"
echo "CPU Model : $CPU_MODEL" >> $INFO_FILE
echo "Total Memory : $(awk '/MemTotal/ {printf "%.2f GB", $2/1024/1024}' /proc/meminfo 2>/dev/null || echo 'Unknown')" >> $INFO_FILE
fi
echo "Store Space : $(df -h /nix/store 2>/dev/null | tail -1 | awk '{print $4 " avail / " $2 " total"}' || echo 'Unknown')" >> $INFO_FILE
echo "-----------------------------------" >> $INFO_FILE
echo "Running heavy task ${toString i} on ${system}..." > $LOG_FILE
dd if=/dev/urandom of=$out/payload.bin bs=1M count=50 status=none
for j in {1..20}; do
sha256sum $out/payload.bin > /dev/null
done
echo "Task ${toString i} completed." >> $LOG_FILE
''
) jobsPerSystem;
# Flatten the nested lists into a single array of derivations
allTasks = nixpkgs.x86_64-linux.lib.flatten (map buildFor systems);
in
nixpkgs.${builtins.currentSystem}.linkFarm "distributed-test-${runSuffix}" (
nixpkgs.x86_64-linux.lib.imap1 (i: task: {
name = "task-${toString i}";
path = task;
}) allTasks
)
EOF
echo "Starting dstat (system metrics) in background..."
dstat -tcmnd --output /tmp/dstat_metrics.csv 2 > /dev/null &
DSTAT_PID=$!
PERF_BIN="/usr/lib/linux-tools/$(uname -r)/perf"
echo "Starting perf (CPU profiler) in background..."
sudo $PERF_BIN record -F 99 -a -g -o /tmp/perf.data &
PERF_PID=$!
echo "Starting remote multi-arch build..."
nix build -L --max-jobs 0 --impure -f test-build.nix
echo "Stopping profilers..."
sudo kill -INT $PERF_PID || true
kill $DSTAT_PID || true
echo "Generating perf report..."
sudo $PERF_BIN report --stdio -i /tmp/perf.data > /tmp/perf_report.txt
echo -e "\n\n🚀 Build completed! Checking results from distributed builders:"
for f in result/task-*/info.txt; do
echo -e "\n============================================="
echo "📄 Content of $f"
echo "============================================="
cat "$f"
done
- name: Upload Profiling Artifacts
uses: actions/upload-artifact@v7
if: always()
with:
name: profiling-${{ needs.config.outputs.run_suffix }}
path: |
/tmp/iperf3_results.json
/tmp/ssh_default_time.txt
/tmp/ssh_tuned_time.txt
/tmp/dstat_metrics.csv
/tmp/perf_report.txt
- name: Optional manual SSH debugging sleep
if: ${{ inputs.debug_sleep_duration > 0 }}
run: |
echo "Sleeping for ${{ inputs.debug_sleep_duration }} seconds for SSH debugging..."
sleep ${{ inputs.debug_sleep_duration }}
- name: Teardown Builders
run: stop-nix-builders