Skip to content

WASM 1. Models

WASM 1. Models #216

Workflow file for this run

name: WASM 1. Models
on:
workflow_dispatch:
inputs:
dry_run:
description: 'Dry run (build only, no release)'
type: boolean
default: true
force:
description: 'Force rebuild (ignore cache)'
type: boolean
default: false
build_mode:
description: 'Build mode'
type: choice
options:
- prod
- dev
default: prod
workflow_call:
inputs:
dry_run:
type: boolean
default: true
force:
type: boolean
default: false
build_mode:
type: string
default: prod
permissions:
contents: read # Read repository contents
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
build:
name: Build Models
permissions:
contents: read # Read repository contents
id-token: write # OIDC authentication for Depot builds
runs-on: ubuntu-24.04
timeout-minutes: 90
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
# Models workflow doesn't need any submodules.
submodules: false
- name: Load tool versions
id: tool-versions
run: |
NODE_VERSION=$(cat .node-version | tr -d '\n')
echo "node-version=$NODE_VERSION" >> $GITHUB_OUTPUT
echo "Loaded Node.js: $NODE_VERSION"
- name: Setup Node.js
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
with:
node-version: ${{ steps.tool-versions.outputs.node-version }}
- name: Setup pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5
# Note: version is specified in package.json packageManager field, not here
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Free up disk space
run: |
echo "Disk space before cleanup:"
df -h
# Remove unnecessary packages to free up ~10GB
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/share/boost
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
# Clean apt cache
sudo apt-get clean
# Remove docker images
docker rmi $(docker images -q) || true
echo "Disk space after cleanup:"
df -h
- name: Set build mode
id: build-mode
env:
INPUT_BUILD_MODE: ${{ inputs.build_mode }}
run: |
# Sanitize input - only allow 'prod' or 'dev'
if [ "$INPUT_BUILD_MODE" = "dev" ]; then
BUILD_MODE="dev"
else
BUILD_MODE="prod"
fi
echo "mode=$BUILD_MODE" >> $GITHUB_OUTPUT
echo "Build mode: $BUILD_MODE"
- name: Load cache version from centralized config
id: cache-version
shell: bash
run: |
CACHE_VERSION=$(jq -r '.versions["models"]' .github/cache-versions.json)
if [ -z "$CACHE_VERSION" ] || [ "$CACHE_VERSION" = "null" ]; then
echo "× Error: Cache version not found for models in .github/cache-versions.json"
exit 1
fi
echo "version=$CACHE_VERSION" >> $GITHUB_OUTPUT
echo "Cache version: $CACHE_VERSION"
- name: Generate model cache key
id: cache-key
env:
CACHE_VERSION: ${{ steps.cache-version.outputs.version }}
run: |
# Cross-platform hash function.
if command -v shasum &> /dev/null; then
hash_cmd="shasum -a 256"
elif command -v sha256sum &> /dev/null; then
hash_cmd="sha256sum"
else
echo "Error: No SHA-256 command found"
exit 1
fi
# Extract model revisions from build.mjs for explicit cache key versioning
# This makes model version changes visible in the cache key (like WASM packages)
MINILM_REVISION=$(grep -A 3 "'minilm-l6'" packages/models/scripts/build.mjs | grep "revision:" | sed "s/.*revision: '\([^']*\)'.*/\1/" | head -c 7)
CODET5_REVISION=$(grep -A 3 "codet5:" packages/models/scripts/build.mjs | grep "revision:" | sed "s/.*revision: '\([^']*\)'.*/\1/")
# Handle 'main' branch for codet5 (not a commit hash)
if [ "$CODET5_REVISION" = "main" ]; then
CODET5_SHORT="main"
else
CODET5_SHORT=$(echo "$CODET5_REVISION" | head -c 7)
fi
# Create composite version string (short revisions for readability)
MODEL_VERSIONS="minilm-${MINILM_REVISION}_codet5-${CODET5_SHORT}"
echo "model_versions=${MODEL_VERSIONS}" >> $GITHUB_OUTPUT
echo "Model versions: ${MODEL_VERSIONS}"
# Per-phase cumulative hashing (like node-smol)
hash_dir() {
local dir=$1
if [ -d "$dir" ]; then
find "$dir" -type f -name "*.mjs" 2>/dev/null | sort | xargs $hash_cmd 2>/dev/null | $hash_cmd | cut -d' ' -f1 || echo ""
else
echo ""
fi
}
COMMON=$(hash_dir packages/models/scripts/common)
PACKAGE_JSON=$($hash_cmd packages/models/package.json | cut -d' ' -f1)
BUILD_MJS=$($hash_cmd packages/models/scripts/build.mjs | cut -d' ' -f1)
# downloaded phase: cache-version + common + downloaded + build.mjs + package.json
DOWNLOADED_DIR=$(hash_dir packages/models/scripts/downloaded)
DOWNLOADED_HASH=$(echo "${CACHE_VERSION}${COMMON}${DOWNLOADED_DIR}${BUILD_MJS}${PACKAGE_JSON}" | $hash_cmd | cut -d' ' -f1)
# converted phase
CONVERTED_DIR=$(hash_dir packages/models/scripts/converted)
CONVERTED_HASH=$(echo "${DOWNLOADED_HASH}${CONVERTED_DIR}" | $hash_cmd | cut -d' ' -f1)
# quantized phase (final - most complete hash)
# Note: finalized is a checkpoint marker, not a separate build phase with scripts
QUANTIZED_DIR=$(hash_dir packages/models/scripts/quantized)
QUANTIZED_HASH=$(echo "${CONVERTED_HASH}${QUANTIZED_DIR}" | $hash_cmd | cut -d' ' -f1)
echo "cache_version=${CACHE_VERSION}" >> $GITHUB_OUTPUT
echo "downloaded_hash=${DOWNLOADED_HASH}" >> $GITHUB_OUTPUT
echo "quantized_hash=${QUANTIZED_HASH}" >> $GITHUB_OUTPUT
- name: Setup Depot CLI
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1.7.1
with:
oidc: true
- name: Restore model checkpoint cache
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
id: model-checkpoint-cache
if: ${{ !inputs.force }}
with:
path: packages/models/build/${{ steps.build-mode.outputs.mode }}/checkpoints
key: model-checkpoints-${{ steps.cache-key.outputs.cache_version }}-${{ steps.cache-key.outputs.model_versions }}-${{ runner.os }}-${{ steps.build-mode.outputs.mode }}-${{ steps.cache-key.outputs.quantized_hash }}
# NOTE: No restore-keys - we require exact match to prevent stale cache restoration.
# Partial matches with restore-keys cause builds to use outdated artifacts when
# source changes bump the quantized_hash but the prefix still matches.
- name: Set checkpoint chain
id: checkpoint-chain
env:
BUILD_MODE: ${{ steps.build-mode.outputs.mode }}
run: |
# Use get-checkpoint-chain.mjs to ensure consistency with local builds
CHAIN=$(node packages/models/scripts/get-checkpoint-chain.mjs --$BUILD_MODE)
echo "checkpoint_chain=$CHAIN" >> $GITHUB_OUTPUT
# Convert comma-separated to space-separated for validation
CHECKPOINTS=$(echo "$CHAIN" | tr ',' ' ')
echo "checkpoints=$CHECKPOINTS" >> $GITHUB_OUTPUT
echo "Checkpoint chain: $CHAIN"
- name: Validate checkpoint cache integrity
id: validate-cache
if: steps.model-checkpoint-cache.outputs.cache-hit == 'true'
uses: ./.github/actions/validate-checkpoints
with:
checkpoint-dirs: packages/models/build/${{ steps.build-mode.outputs.mode }}/checkpoints
checkpoints: ${{ steps.checkpoint-chain.outputs.checkpoints }}
package-name: models
- name: Restore build output from checkpoint chain
id: restore-checkpoint
uses: ./.github/actions/restore-checkpoint
with:
package-name: 'models'
build-mode: ${{ steps.build-mode.outputs.mode }}
checkpoint-chain: ${{ steps.checkpoint-chain.outputs.checkpoint_chain }}
cache-hit: ${{ steps.model-checkpoint-cache.outputs.cache-hit }}
cache-valid: ${{ steps.validate-cache.outputs.valid }}
- name: Build models with Depot (offloads compute)
if: |
(steps.model-checkpoint-cache.outputs.cache-hit != 'true' || steps.validate-cache.outputs.valid == 'false') ||
steps.restore-checkpoint.outputs.build-required == 'true'
uses: depot/build-push-action@5f3b3c2e5a00f0093de47f657aeaefcedff27d18 # v1.17.0
with:
project: 8fpj9495vw
platforms: linux/amd64
build-args: |
BUILD_MODE=${{ steps.build-mode.outputs.mode }}
CACHE_VERSION=${{ steps.cache-version.outputs.version }}
CACHE_BUSTER=${{ github.sha }}
no-cache: ${{ inputs.force == true }}
file: packages/models/docker/Dockerfile.linux
target: export
outputs: type=local,dest=packages/models/build
context: .
- name: Verify Depot build output
if: |
(steps.model-checkpoint-cache.outputs.cache-hit != 'true' || steps.validate-cache.outputs.valid == 'false') ||
steps.restore-checkpoint.outputs.build-required == 'true'
env:
BUILD_MODE: ${{ steps.build-mode.outputs.mode }}
run: |
echo "Verifying Depot build output structure..."
ls -lah packages/models/build/
# Check if build artifacts exist.
if [ ! -d "packages/models/build/$BUILD_MODE" ]; then
echo "× Build directory not found!"
echo "Contents of output directory:"
find packages/models/build/ -type f
exit 1
fi
echo "✅ Build artifacts found"
ls -lh packages/models/build/$BUILD_MODE/
- name: Validate Depot checkpoints
if: |
(steps.model-checkpoint-cache.outputs.cache-hit != 'true' || steps.validate-cache.outputs.valid == 'false') ||
steps.restore-checkpoint.outputs.build-required == 'true'
uses: ./.github/actions/validate-depot-checkpoints
with:
package-path: packages/models
build-mode: ${{ steps.build-mode.outputs.mode }}
package-name: models
- name: Validate build output
env:
BUILD_MODE: ${{ steps.build-mode.outputs.mode }}
run: |
echo "Validating models build output for ${BUILD_MODE}..."
if [ ! -f "packages/models/build/${BUILD_MODE}/out/Final/minilm-l6/model.onnx" ]; then
echo "× Build failed: MiniLM model missing"
exit 1
fi
if [ ! -f "packages/models/build/${BUILD_MODE}/out/Final/codet5/model.onnx" ]; then
echo "× Build failed: CodeT5 model missing"
exit 1
fi
MINILM_SIZE=$(stat -c%s packages/models/build/${BUILD_MODE}/out/Final/minilm-l6/model.onnx)
CODET5_SIZE=$(stat -c%s packages/models/build/${BUILD_MODE}/out/Final/codet5/model.onnx)
# Different size thresholds for different build modes
# dev (int8): ~20MB (expect >1MB after quantization)
# prod (int4): ~600KB (expect >100KB after aggressive quantization)
if [ "${BUILD_MODE}" = "prod" ]; then
MIN_SIZE=100000 # 100KB minimum for prod (int4)
else
MIN_SIZE=1000000 # 1MB minimum for dev (int8)
fi
if [ "$MINILM_SIZE" -lt "$MIN_SIZE" ]; then
echo "× Build failed: MiniLM model too small ($MINILM_SIZE bytes, expected >$MIN_SIZE)"
exit 1
fi
if [ "$CODET5_SIZE" -lt "$MIN_SIZE" ]; then
echo "× Build failed: CodeT5 model too small ($CODET5_SIZE bytes, expected >$MIN_SIZE)"
exit 1
fi
echo "✅ Build validation passed"
- name: Upload model artifacts
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: models
path: packages/models/build/${{ steps.build-mode.outputs.mode }}/out/Final/
retention-days: 30
if-no-files-found: error
- name: Cleanup before cache save
if: always()
env:
BUILD_MODE: ${{ steps.build-mode.outputs.mode }}
run: |
echo "Cleaning up temporary files before cache save..."
# Remove temporary build files that shouldn't be cached
TEMP_DIRS=(
"packages/models/build/${BUILD_MODE}/temp"
"packages/models/build/${BUILD_MODE}/downloads"
"packages/models/build/${BUILD_MODE}/out"
)
for DIR in "${TEMP_DIRS[@]}"; do
if [ -d "$DIR" ]; then
echo "Removing: $DIR"
rm -rf "$DIR"
fi
done
echo "✅ Cleanup complete"
release:
name: Release Models
needs: build
if: github.event_name == 'workflow_dispatch' && !inputs.dry_run
runs-on: ubuntu-24.04
environment: release # Dedicated environment for secret access with approval gates
permissions:
contents: write # Required to create GitHub releases
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
sparse-checkout: |
.github/scripts/generate-version.sh
sparse-checkout-cone-mode: false
- name: Download model artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
name: models
path: packages/models/build/prod/out/Final/
- name: Generate version
id: version
run: |
source .github/scripts/generate-version.sh
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
- name: Generate checksums
run: |
cd packages/models/build/prod/out/Final
find . -name "*.onnx" -exec $(command -v shasum &> /dev/null && echo "shasum -a 256" || echo "sha256sum") {} \; > checksums.txt
cat checksums.txt
- name: Import GPG key
if: ${{ env.GPG_PRIVATE_KEY != '' }}
env:
GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }}
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
run: |
if [ -n "$GPG_PRIVATE_KEY" ]; then
echo "$GPG_PRIVATE_KEY" | gpg --batch --import
echo "GPG key imported successfully"
else
echo "⚠️ GPG_PRIVATE_KEY secret not set, skipping signature"
fi
- name: Sign checksums
if: ${{ env.GPG_PRIVATE_KEY != '' }}
env:
GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }}
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
run: |
if [ -n "$GPG_PRIVATE_KEY" ]; then
cd packages/models/build/prod/out/Final
if [ -n "$GPG_PASSPHRASE" ]; then
echo "$GPG_PASSPHRASE" | gpg --batch --yes --passphrase-fd 0 --detach-sign --armor checksums.txt
else
gpg --batch --yes --detach-sign --armor checksums.txt
fi
echo "[OK] Created checksums.txt.asc"
ls -lh checksums.txt.asc
fi
- name: Get model versions
id: model-versions
run: |
# Extract model names from package.json moduleSources
MINILM_MODEL=$(jq -r '.moduleSources["minilm-l6"].primary' packages/models/package.json | sed 's/.*\///')
CODET5_MODEL=$(jq -r '.moduleSources.codet5.primary' packages/models/package.json | sed 's/.*\///')
echo "minilm=${MINILM_MODEL}" >> $GITHUB_OUTPUT
echo "codet5=${CODET5_MODEL}" >> $GITHUB_OUTPUT
echo "MiniLM model: ${MINILM_MODEL}"
echo "CodeT5 model: ${CODET5_MODEL}"
- name: Create GitHub Release
env:
GH_TOKEN: ${{ github.token }}
STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }}
CODET5_MODEL: ${{ steps.model-versions.outputs.codet5 }}
MINILM_MODEL: ${{ steps.model-versions.outputs.minilm }}
run: |
VERSION="${STEPS_VERSION_OUTPUTS_VERSION}"
RELEASE_NAME="models"
TAG="${RELEASE_NAME}-${VERSION}"
# Check if release already exists
if gh release view "$TAG" &>/dev/null; then
echo "Release $TAG already exists, uploading assets..."
# Create archive with versioned name
cd packages/models/build/prod/out/Final
tar -czf ../models-${VERSION}.tar.gz .
UPLOAD_ARGS="../models-${VERSION}.tar.gz \
checksums.txt"
# Add signature if it exists
if [ -f checksums.txt.asc ]; then
UPLOAD_ARGS="$UPLOAD_ARGS checksums.txt.asc"
fi
gh release upload "$TAG" $UPLOAD_ARGS --clobber
else
echo "Creating new release $TAG..."
# Create archive with versioned name
cd packages/models/build/prod/out/Final
tar -czf ../models-${VERSION}.tar.gz .
# Extract date-hash from tag for title
TITLE_SUFFIX="${TAG#models-}"
gh release create "$TAG" \
--title "models ${TITLE_SUFFIX}" \
--notes "INT4 quantized ONNX models: ${CODET5_MODEL} and ${MINILM_MODEL}.
## Platforms
- Platform-independent ONNX models (universal)
## Files
- \`models-${VERSION}.tar.gz\` - All production models (INT4 quantized)
- \`checksums.txt\` - SHA256 checksums
## Included Models
- \`minilm-l6/model.onnx\` - Sentence embeddings model
- \`codet5/model.onnx\` - Code understanding model
## Usage
Extract the archive and load models with ONNX Runtime:
\`\`\`bash
tar -xzf models-${VERSION}.tar.gz
\`\`\`" \
../models-${VERSION}.tar.gz \
checksums.txt \
$([ -f checksums.txt.asc ] && echo "checksums.txt.asc" || echo "")
fi