LlamaDemo Android #310

Workflow file for this run

.github/workflows/llm-android.yml at 90c4914

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	name: LlamaDemo Android

	on:
	pull_request:
	branches: [main]
	paths:
	- 'llm/android/**'
	- '.github/workflows/llm-android.yml'
	workflow_dispatch:
	inputs:
	model_preset:
	description: 'Model preset to use'
	required: true
	type: choice
	options:
	- stories
	- llama
	- qwen3
	- custom
	default: 'stories'
	custom_pte_url:
	description: 'Custom URL for model .pte file (only used when model_preset is custom)'
	required: false
	type: string
	custom_tokenizer_url:
	description: 'Custom URL for tokenizer file (only used when model_preset is custom)'
	required: false
	type: string
	local_aar:
	description: 'URL to download a local AAR file. When set, the workflow will download the AAR and use it instead of the Maven dependency.'
	required: false
	type: string

	permissions:
	contents: read

	jobs:
	instrumentation-test:
	runs-on: 8-core-ubuntu
	env:
	API_LEVEL: 34
	ARCH: x86_64

	name: Instrumentation Test LlamaDemo
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Write job summary
	run: \|
	echo "## Test Configuration" >> $GITHUB_STEP_SUMMARY
	echo "\| Parameter \| Value \|" >> $GITHUB_STEP_SUMMARY
	echo "\|-----------\|-------\|" >> $GITHUB_STEP_SUMMARY
	echo "\| Model Preset \| \`${{ inputs.model_preset \|\| 'stories' }}\` \|" >> $GITHUB_STEP_SUMMARY
	if [ "${{ inputs.model_preset }}" = "custom" ]; then
	echo "\| Custom PTE URL \| \`${{ inputs.custom_pte_url }}\` \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Custom Tokenizer URL \| \`${{ inputs.custom_tokenizer_url }}\` \|" >> $GITHUB_STEP_SUMMARY
	fi

	- name: Enable KVM group perms
	run: \|
	echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' \| sudo tee /etc/udev/rules.d/99-kvm4all.rules
	sudo udevadm control --reload-rules
	sudo udevadm trigger --name-match=kvm

	- name: Set up JDK 17
	uses: actions/setup-java@v4
	with:
	java-version: '17'
	distribution: 'temurin'

	- name: Setup Gradle
	uses: gradle/actions/setup-gradle@v4

	- name: Download local AAR
	if: ${{ inputs.local_aar }}
	run: \|
	mkdir -p llm/android/LlamaDemo/app/libs
	curl -fL -o llm/android/LlamaDemo/app/libs/executorch.aar "${{ inputs.local_aar }}"

	- name: AVD cache
	uses: actions/cache@v4
	id: avd-cache
	with:
	path: \|
	~/.android/avd/*
	~/.android/adb*
	key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram16G-disk16G-v5

	- name: Create AVD and generate snapshot for caching
	if: steps.avd-cache.outputs.cache-hit != 'true'
	uses: reactivecircus/android-emulator-runner@v2
	with:
	api-level: ${{ env.API_LEVEL }}
	arch: ${{ env.ARCH }}
	ram-size: 16384M
	disk-size: 16384M
	force-avd-creation: true
	emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none -no-snapshot-save -memory 16384
	disable-animations: false
	working-directory: llm/android/LlamaDemo
	script: echo "Generated AVD snapshot for caching."

	- name: Download model files
	env:
	MODEL_PRESET: ${{ inputs.model_preset \|\| 'stories' }}
	CUSTOM_PTE_URL: ${{ inputs.custom_pte_url }}
	CUSTOM_TOKENIZER_URL: ${{ inputs.custom_tokenizer_url }}
	run: \|
	mkdir -p /tmp/llama_models

	# Determine URLs based on preset
	case "$MODEL_PRESET" in
	llama)
	PTE_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/llama3_2-1B.pte"
	TOKENIZER_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/tokenizer.model"
	;;
	qwen3)
	PTE_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/model.pte"
	TOKENIZER_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/tokenizer.json"
	;;
	custom)
	PTE_URL="$CUSTOM_PTE_URL"
	TOKENIZER_URL="$CUSTOM_TOKENIZER_URL"
	;;
	*)
	PTE_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/stories110M.pte"
	TOKENIZER_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/tokenizer.model"
	;;
	esac

	PTE_FILE=$(basename "$PTE_URL")
	TOKENIZER_FILE=$(basename "$TOKENIZER_URL")

	echo "Downloading model: $PTE_URL"
	curl -fL --progress-bar -o "/tmp/llama_models/$PTE_FILE" "$PTE_URL"

	echo "Downloading tokenizer: $TOKENIZER_URL"
	curl -fL --progress-bar -o "/tmp/llama_models/$TOKENIZER_FILE" "$TOKENIZER_URL"

	echo "Downloaded files:"
	ls -lh /tmp/llama_models/

	# Export filenames for later steps
	echo "MODEL_FILE=$PTE_FILE" >> $GITHUB_ENV
	echo "TOKENIZER_FILE=$TOKENIZER_FILE" >> $GITHUB_ENV

	- name: Run instrumentation tests
	uses: reactivecircus/android-emulator-runner@v2
	env:
	MODEL_PRESET: ${{ inputs.model_preset \|\| 'stories' }}
	USE_LOCAL_AAR: ${{ inputs.local_aar != '' }}
	with:
	api-level: ${{ env.API_LEVEL }}
	arch: ${{ env.ARCH }}
	ram-size: 16384M
	disk-size: 16384M
	force-avd-creation: true
	emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none -no-snapshot-save -memory 16384
	disable-animations: true
	working-directory: llm/android/LlamaDemo
	script: bash ./scripts/run-ci-tests.sh "$MODEL_PRESET" "$MODEL_FILE" "$TOKENIZER_FILE" "$USE_LOCAL_AAR"

	- name: Add model response to summary
	if: always()
	run: \|
	if [ -f /tmp/response.txt ]; then
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "## Model Response" >> $GITHUB_STEP_SUMMARY
	echo '```' >> $GITHUB_STEP_SUMMARY
	cat /tmp/response.txt >> $GITHUB_STEP_SUMMARY
	echo '```' >> $GITHUB_STEP_SUMMARY
	fi

	- name: Upload logcat
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: logcat
	path: /tmp/logcat.txt
	retention-days: 7

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

LlamaDemo Android #310

Workflow file

LlamaDemo Android #310

Uh oh!

Workflow file for this run