-
Notifications
You must be signed in to change notification settings - Fork 0
262 lines (218 loc) · 8.87 KB
/
deploy.yml
File metadata and controls
262 lines (218 loc) · 8.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
name: Deploy
on:
push:
branches:
- master
paths:
- 'app/**'
- 'Dockerfile'
- 'docker-compose.dev.yml'
- 'scripts/workflow_deploy.sh'
- '.github/workflows/deploy.yml'
# Prevent concurrent workflows - if infrastructure deploy is running, wait
concurrency:
group: ${{ github.repository }}-all-workflows
cancel-in-progress: false
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build test image
run: |
docker build --target test -t ttb-verifier:test .
- name: Run tests
run: |
docker run --rm ttb-verifier:test
build-and-push:
needs: test
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
image-digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set lowercase image name
id: image
run: echo "name=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Docker image
id: build
uses: docker/build-push-action@v5
with:
context: .
target: production
push: true
tags: |
${{ env.REGISTRY }}/${{ steps.image.outputs.name }}:latest
${{ env.REGISTRY }}/${{ steps.image.outputs.name }}:${{ github.ref_name }}-${{ github.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push worker Docker image
uses: docker/build-push-action@v5
with:
context: .
target: worker
push: true
tags: |
${{ env.REGISTRY }}/${{ steps.image.outputs.name }}-worker:latest
${{ env.REGISTRY }}/${{ steps.image.outputs.name }}-worker:${{ github.ref_name }}-${{ github.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
deploy:
needs: build-and-push
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
aws-region: ${{ secrets.AWS_REGION }}
role-session-name: GitHubActions-Deploy
- name: Deploy to EC2 via SSM
run: |
echo "🚀 Deploying to EC2 instance: ${{ secrets.EC2_INSTANCE_ID }}"
# Detect what changed to determine deployment strategy
echo "🔍 Analyzing changed files..."
git fetch origin ${{ github.event.before }}
CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} || echo "")
# Check if only app code changed (verifier-only deployment)
VERIFIER_ONLY=false
if echo "$CHANGED_FILES" | grep -qE '^(app/|Dockerfile|scripts/workflow_deploy\.sh|\.github/workflows/deploy\.yml)'; then
# App-related files changed - check if ONLY app files changed
if ! echo "$CHANGED_FILES" | grep -qE '^(docker-compose|infrastructure/)'; then
VERIFIER_ONLY=true
echo "✅ Only app code changed - verifier-only deployment"
echo " Ollama will NOT be restarted (model stays in GPU)"
else
echo "🔄 Infrastructure files changed - full deployment"
fi
fi
# Set deployment command
if [ "$VERIFIER_ONLY" = "true" ]; then
DEPLOY_CMD='VERIFIER_ONLY=true /app/workflow_deploy.sh'
else
DEPLOY_CMD='/app/workflow_deploy.sh'
fi
echo "📋 Deployment command: $DEPLOY_CMD"
# Send deployment command via SSM
COMMAND_ID=$(aws ssm send-command \
--instance-ids "${{ secrets.EC2_INSTANCE_ID }}" \
--document-name "AWS-RunShellScript" \
--parameters "commands=[\"$DEPLOY_CMD\"]" \
--comment "GitHub Actions deployment - commit ${{ github.sha }}" \
--output text \
--query "Command.CommandId")
echo "📋 Command ID: $COMMAND_ID"
echo "⏳ Waiting for SSM agent to pick up command..."
sleep 3
echo "⏳ Polling for deployment completion (timeout: 5 minutes)..."
# Poll for command completion (30 attempts x 10 seconds = 5 minutes)
MAX_ATTEMPTS=30
ATTEMPT=0
LAST_STATUS=""
while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
ATTEMPT=$((ATTEMPT+1))
# Get command status with proper error handling
# Temporarily disable exit-on-error for this command
set +e
RESULT=$(aws ssm get-command-invocation \
--command-id "$COMMAND_ID" \
--instance-id "${{ secrets.EC2_INSTANCE_ID }}" \
--query 'Status' \
--output text 2>&1)
EXIT_CODE=$?
set -e
# Check if command invocation exists yet
if [ $EXIT_CODE -ne 0 ]; then
if echo "$RESULT" | grep -q "InvocationDoesNotExist"; then
STATUS="Waiting"
else
echo "⚠️ Error querying command status: $RESULT"
STATUS="Unknown"
fi
else
STATUS="$RESULT"
fi
# Only print status if it changed
if [ "$STATUS" != "$LAST_STATUS" ]; then
echo "Status: $STATUS"
LAST_STATUS="$STATUS"
else
echo -n "."
fi
# Check for terminal states
if [ "$STATUS" = "Success" ]; then
echo ""
echo "✅ Deployment completed successfully!"
break
elif [ "$STATUS" = "Failed" ] || [ "$STATUS" = "Cancelled" ] || [ "$STATUS" = "TimedOut" ]; then
echo ""
echo "❌ Deployment failed with status: $STATUS"
# Get command output for debugging
echo "📄 Deployment output:"
aws ssm get-command-invocation \
--command-id "$COMMAND_ID" \
--instance-id "${{ secrets.EC2_INSTANCE_ID }}" \
--query '[StandardOutputContent,StandardErrorContent]' \
--output text
exit 1
fi
# Wait before next poll (unless we just succeeded)
if [ "$STATUS" != "Success" ] && [ $ATTEMPT -lt $MAX_ATTEMPTS ]; then
sleep 10
fi
done
if [ $ATTEMPT -eq $MAX_ATTEMPTS ] && [ "$STATUS" != "Success" ]; then
echo ""
echo "❌ Deployment timed out after 5 minutes"
echo "Last known status: $STATUS"
# Try to get any available output
echo "📄 Attempting to retrieve deployment logs:"
aws ssm get-command-invocation \
--command-id "$COMMAND_ID" \
--instance-id "${{ secrets.EC2_INSTANCE_ID }}" \
--query '[StandardOutputContent,StandardErrorContent]' \
--output text 2>&1 || echo "No logs available yet"
exit 1
fi
# Get final command output
echo "📄 Deployment output:"
aws ssm get-command-invocation \
--command-id "$COMMAND_ID" \
--instance-id "${{ secrets.EC2_INSTANCE_ID }}" \
--query '[StandardOutputContent,StandardErrorContent]' \
--output text
- name: Verify deployment
run: |
echo "🔍 Verifying deployment..."
sleep 10 # Give the service a moment to fully start
# Get the application URL from terraform outputs (if available)
# For now, we'll just verify via SSM that containers are running
aws ssm send-command \
--instance-ids "${{ secrets.EC2_INSTANCE_ID }}" \
--document-name "AWS-RunShellScript" \
--parameters 'commands=["docker ps --filter name=ttb-verifier --format \"{{.Status}}\""]' \
--output text
echo "✅ Deployment verification complete!"