Skip to content

LlamaDemo Android

LlamaDemo Android #310

Workflow file for this run

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
name: LlamaDemo Android
on:
pull_request:
branches: [main]
paths:
- 'llm/android/**'
- '.github/workflows/llm-android.yml'
workflow_dispatch:
inputs:
model_preset:
description: 'Model preset to use'
required: true
type: choice
options:
- stories
- llama
- qwen3
- custom
default: 'stories'
custom_pte_url:
description: 'Custom URL for model .pte file (only used when model_preset is custom)'
required: false
type: string
custom_tokenizer_url:
description: 'Custom URL for tokenizer file (only used when model_preset is custom)'
required: false
type: string
local_aar:
description: 'URL to download a local AAR file. When set, the workflow will download the AAR and use it instead of the Maven dependency.'
required: false
type: string
permissions:
contents: read
jobs:
instrumentation-test:
runs-on: 8-core-ubuntu
env:
API_LEVEL: 34
ARCH: x86_64
name: Instrumentation Test LlamaDemo
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Write job summary
run: |
echo "## Test Configuration" >> $GITHUB_STEP_SUMMARY
echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY
echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY
echo "| Model Preset | \`${{ inputs.model_preset || 'stories' }}\` |" >> $GITHUB_STEP_SUMMARY
if [ "${{ inputs.model_preset }}" = "custom" ]; then
echo "| Custom PTE URL | \`${{ inputs.custom_pte_url }}\` |" >> $GITHUB_STEP_SUMMARY
echo "| Custom Tokenizer URL | \`${{ inputs.custom_tokenizer_url }}\` |" >> $GITHUB_STEP_SUMMARY
fi
- name: Enable KVM group perms
run: |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
java-version: '17'
distribution: 'temurin'
- name: Setup Gradle
uses: gradle/actions/setup-gradle@v4
- name: Download local AAR
if: ${{ inputs.local_aar }}
run: |
mkdir -p llm/android/LlamaDemo/app/libs
curl -fL -o llm/android/LlamaDemo/app/libs/executorch.aar "${{ inputs.local_aar }}"
- name: AVD cache
uses: actions/cache@v4
id: avd-cache
with:
path: |
~/.android/avd/*
~/.android/adb*
key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram16G-disk16G-v5
- name: Create AVD and generate snapshot for caching
if: steps.avd-cache.outputs.cache-hit != 'true'
uses: reactivecircus/android-emulator-runner@v2
with:
api-level: ${{ env.API_LEVEL }}
arch: ${{ env.ARCH }}
ram-size: 16384M
disk-size: 16384M
force-avd-creation: true
emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none -no-snapshot-save -memory 16384
disable-animations: false
working-directory: llm/android/LlamaDemo
script: echo "Generated AVD snapshot for caching."
- name: Download model files
env:
MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
CUSTOM_PTE_URL: ${{ inputs.custom_pte_url }}
CUSTOM_TOKENIZER_URL: ${{ inputs.custom_tokenizer_url }}
run: |
mkdir -p /tmp/llama_models
# Determine URLs based on preset
case "$MODEL_PRESET" in
llama)
PTE_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/llama3_2-1B.pte"
TOKENIZER_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/tokenizer.model"
;;
qwen3)
PTE_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/model.pte"
TOKENIZER_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/tokenizer.json"
;;
custom)
PTE_URL="$CUSTOM_PTE_URL"
TOKENIZER_URL="$CUSTOM_TOKENIZER_URL"
;;
*)
PTE_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/stories110M.pte"
TOKENIZER_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/tokenizer.model"
;;
esac
PTE_FILE=$(basename "$PTE_URL")
TOKENIZER_FILE=$(basename "$TOKENIZER_URL")
echo "Downloading model: $PTE_URL"
curl -fL --progress-bar -o "/tmp/llama_models/$PTE_FILE" "$PTE_URL"
echo "Downloading tokenizer: $TOKENIZER_URL"
curl -fL --progress-bar -o "/tmp/llama_models/$TOKENIZER_FILE" "$TOKENIZER_URL"
echo "Downloaded files:"
ls -lh /tmp/llama_models/
# Export filenames for later steps
echo "MODEL_FILE=$PTE_FILE" >> $GITHUB_ENV
echo "TOKENIZER_FILE=$TOKENIZER_FILE" >> $GITHUB_ENV
- name: Run instrumentation tests
uses: reactivecircus/android-emulator-runner@v2
env:
MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
USE_LOCAL_AAR: ${{ inputs.local_aar != '' }}
with:
api-level: ${{ env.API_LEVEL }}
arch: ${{ env.ARCH }}
ram-size: 16384M
disk-size: 16384M
force-avd-creation: true
emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none -no-snapshot-save -memory 16384
disable-animations: true
working-directory: llm/android/LlamaDemo
script: bash ./scripts/run-ci-tests.sh "$MODEL_PRESET" "$MODEL_FILE" "$TOKENIZER_FILE" "$USE_LOCAL_AAR"
- name: Add model response to summary
if: always()
run: |
if [ -f /tmp/response.txt ]; then
echo "" >> $GITHUB_STEP_SUMMARY
echo "## Model Response" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
cat /tmp/response.txt >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
fi
- name: Upload logcat
if: always()
uses: actions/upload-artifact@v4
with:
name: logcat
path: /tmp/logcat.txt
retention-days: 7