executorch-examples/.github/workflows/llm-android.yml at main · meta-pytorch/executorch-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

name: LlamaDemo Android

on:
  pull_request:
    branches: [main]
    paths:
      - 'llm/android/**'
      - '.github/workflows/llm-android.yml'
  workflow_dispatch:
    inputs:
      model_preset:
        description: 'Model preset to use'
        required: true
        type: choice
        options:
          - stories
          - llama
          - qwen3
          - custom
        default: 'stories'
      custom_pte_url:
        description: 'Custom URL for model .pte file (only used when model_preset is custom)'
        required: false
        type: string
      custom_tokenizer_url:
        description: 'Custom URL for tokenizer file (only used when model_preset is custom)'
        required: false
        type: string
      local_aar:
        description: 'URL to download a local AAR file. When set, the workflow will download the AAR and use it instead of the Maven dependency.'
        required: false
        type: string

permissions:
  contents: read

jobs:
  instrumentation-test:
    runs-on: 8-core-ubuntu
    env:
      API_LEVEL: 34
      ARCH: x86_64

    name: Instrumentation Test LlamaDemo
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Write job summary
        run: |
          echo "## Test Configuration" >> $GITHUB_STEP_SUMMARY
          echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY
          echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY
          echo "| Model Preset | \`${{ inputs.model_preset || 'stories' }}\` |" >> $GITHUB_STEP_SUMMARY
          if [ "${{ inputs.model_preset }}" = "custom" ]; then
            echo "| Custom PTE URL | \`${{ inputs.custom_pte_url }}\` |" >> $GITHUB_STEP_SUMMARY
            echo "| Custom Tokenizer URL | \`${{ inputs.custom_tokenizer_url }}\` |" >> $GITHUB_STEP_SUMMARY
          fi

      - name: Enable KVM group perms
        run: |
          echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
          sudo udevadm control --reload-rules
          sudo udevadm trigger --name-match=kvm

      - name: Set up JDK 17
        uses: actions/setup-java@v4
        with:
          java-version: '17'
          distribution: 'temurin'

      - name: Setup Gradle
        uses: gradle/actions/setup-gradle@v4

      - name: Download local AAR
        if: ${{ inputs.local_aar }}
        run: |
          mkdir -p llm/android/LlamaDemo/app/libs
          curl -fL -o llm/android/LlamaDemo/app/libs/executorch.aar "${{ inputs.local_aar }}"

      - name: AVD cache
        uses: actions/cache@v4
        id: avd-cache
        with:
          path: |
            ~/.android/avd/*
            ~/.android/adb*
          key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram16G-disk16G-v5

      - name: Create AVD and generate snapshot for caching
        if: steps.avd-cache.outputs.cache-hit != 'true'
        uses: reactivecircus/android-emulator-runner@v2
        with:
          api-level: ${{ env.API_LEVEL }}
          arch: ${{ env.ARCH }}
          ram-size: 16384M
          disk-size: 16384M
          force-avd-creation: true
          emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none -no-snapshot-save -memory 16384
          disable-animations: false
          working-directory: llm/android/LlamaDemo
          script: echo "Generated AVD snapshot for caching."

      - name: Download model files
        env:
          MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
          CUSTOM_PTE_URL: ${{ inputs.custom_pte_url }}
          CUSTOM_TOKENIZER_URL: ${{ inputs.custom_tokenizer_url }}
        run: |
          mkdir -p /tmp/llama_models

          # Determine URLs based on preset
          case "$MODEL_PRESET" in
            llama)
              PTE_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/llama3_2-1B.pte"
              TOKENIZER_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/tokenizer.model"
              ;;
            qwen3)
              PTE_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/model.pte"
              TOKENIZER_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/tokenizer.json"
              ;;
            custom)
              PTE_URL="$CUSTOM_PTE_URL"
              TOKENIZER_URL="$CUSTOM_TOKENIZER_URL"
              ;;
            *)
              PTE_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/stories110M.pte"
              TOKENIZER_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/tokenizer.model"
              ;;
          esac

          PTE_FILE=$(basename "$PTE_URL")
          TOKENIZER_FILE=$(basename "$TOKENIZER_URL")

          echo "Downloading model: $PTE_URL"
          curl -fL --progress-bar -o "/tmp/llama_models/$PTE_FILE" "$PTE_URL"

          echo "Downloading tokenizer: $TOKENIZER_URL"
          curl -fL --progress-bar -o "/tmp/llama_models/$TOKENIZER_FILE" "$TOKENIZER_URL"

          echo "Downloaded files:"
          ls -lh /tmp/llama_models/

          # Export filenames for later steps
          echo "MODEL_FILE=$PTE_FILE" >> $GITHUB_ENV
          echo "TOKENIZER_FILE=$TOKENIZER_FILE" >> $GITHUB_ENV

      - name: Run instrumentation tests
        uses: reactivecircus/android-emulator-runner@v2
        env:
          MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
          USE_LOCAL_AAR: ${{ inputs.local_aar != '' }}
        with:
          api-level: ${{ env.API_LEVEL }}
          arch: ${{ env.ARCH }}
          ram-size: 16384M
          disk-size: 16384M
          force-avd-creation: true
          emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none -no-snapshot-save -memory 16384
          disable-animations: true
          working-directory: llm/android/LlamaDemo
          script: bash ./scripts/run-ci-tests.sh "$MODEL_PRESET" "$MODEL_FILE" "$TOKENIZER_FILE" "$USE_LOCAL_AAR"

      - name: Add model response to summary
        if: always()
        run: |
          if [ -f /tmp/response.txt ]; then
            echo "" >> $GITHUB_STEP_SUMMARY
            echo "## Model Response" >> $GITHUB_STEP_SUMMARY
            echo '```' >> $GITHUB_STEP_SUMMARY
            cat /tmp/response.txt >> $GITHUB_STEP_SUMMARY
            echo '```' >> $GITHUB_STEP_SUMMARY
          fi

      - name: Upload logcat
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: logcat
          path: /tmp/logcat.txt
          retention-days: 7