Skip to content

Commit 583d7e5

Browse files
authored
Merge pull request #9 from braintrustdata/ark/fetcher-script
release process for spec tests
2 parents 0b74173 + b85e094 commit 583d7e5

14 files changed

Lines changed: 368 additions & 14 deletions

File tree

.github/release.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
changelog:
2+
exclude:
3+
labels:
4+
- ignore-for-release
5+
- dependencies
6+
authors:
7+
- dependabot
8+
categories:
9+
- title: Features
10+
labels:
11+
- feature
12+
- enhancement
13+
- title: Bug Fixes
14+
labels:
15+
- bug
16+
- fix
17+
- title: Documentation
18+
labels:
19+
- documentation
20+
- docs
21+
- title: Other Changes
22+
labels:
23+
- "*"
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# This workflow is triggered when a new tag is pushed to main.
2+
# It can also be run manually to re-publish a release in case it failed for some reason.
3+
name: Publish Release From Tag
4+
5+
on:
6+
push:
7+
tags:
8+
- 'v*'
9+
workflow_dispatch:
10+
inputs:
11+
tag:
12+
description: 'Tag to publish (e.g., v1.0.0)'
13+
required: true
14+
type: string
15+
16+
permissions:
17+
contents: write
18+
19+
jobs:
20+
validate-and-publish:
21+
name: Validate Tag and Publish Release
22+
runs-on: ubuntu-24.04
23+
steps:
24+
- name: Checkout code
25+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
26+
with:
27+
fetch-depth: 0
28+
29+
- name: Determine tag
30+
id: determine-tag
31+
run: |
32+
if [[ "${{ github.event_name }}" == "push" ]]; then
33+
TAG_NAME="${{ github.ref_name }}"
34+
else
35+
TAG_NAME="${{ inputs.tag }}"
36+
fi
37+
echo "tag=$TAG_NAME" >> $GITHUB_OUTPUT
38+
echo "Using tag: $TAG_NAME"
39+
40+
- name: Validate tag format
41+
run: |
42+
TAG="${{ steps.determine-tag.outputs.tag }}"
43+
44+
if [[ ! "$TAG" =~ ^v ]]; then
45+
echo "Error: Tag '$TAG' must start with 'v'"
46+
exit 1
47+
fi
48+
49+
VERSION="${TAG#v}"
50+
51+
if [[ ! "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
52+
echo "Error: Tag '$TAG' is not valid semver format (vx.y.z)"
53+
exit 1
54+
fi
55+
56+
echo "Tag '$TAG' is valid"
57+
58+
- name: Verify tag exists
59+
run: |
60+
TAG="${{ steps.determine-tag.outputs.tag }}"
61+
if ! git tag -l | grep -q "^$TAG$"; then
62+
echo "Error: Tag '$TAG' does not exist"
63+
exit 1
64+
fi
65+
echo "Tag '$TAG' exists"
66+
67+
- name: Checkout tag
68+
run: |
69+
git checkout ${{ steps.determine-tag.outputs.tag }}
70+
71+
- name: Run tests
72+
run: ./scripts/test.sh
73+
74+
- name: Create GitHub Release
75+
run: |
76+
TAG="${{ steps.determine-tag.outputs.tag }}"
77+
gh release create "$TAG" \
78+
--generate-notes \
79+
--title "Release $TAG"
80+
env:
81+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

scripts/fetch.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
REPO="https://github.com/braintrustdata/braintrust-spec"
5+
REF="${1:?Usage: $0 <sha-or-tag>}"
6+
7+
# Verify the ref exists
8+
if ! git ls-remote --exit-code "$REPO" "$REF" >/dev/null 2>&1; then
9+
# ls-remote matches refs by name; for a raw SHA we need to actually try fetching
10+
RESOLVED=$(git ls-remote "$REPO" | awk '{print $1}' | grep -q "^${REF}" && echo yes || echo no)
11+
if [ "$RESOLVED" = "no" ]; then
12+
echo "Error: ref '$REF' not found in $REPO" >&2
13+
exit 1
14+
fi
15+
fi
16+
17+
OUTDIR="${2:-.}"
18+
mkdir -p "$OUTDIR"
19+
20+
# Download and extract the tarball — no full clone needed
21+
curl -sfL "$REPO/archive/$REF.tar.gz" -o /tmp/braintrust-spec-$$.tar.gz || {
22+
echo "Error: failed to download archive for '$REF'" >&2
23+
exit 1
24+
}
25+
26+
tar -xzf /tmp/braintrust-spec-$$.tar.gz --strip-components=1 -C "$OUTDIR"
27+
rm -f /tmp/braintrust-spec-$$.tar.gz
28+
echo "Fetched $REF into $OUTDIR"

scripts/release.sh

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
6+
# Usage function
7+
usage() {
8+
echo "Usage: ./scripts/release.sh <version> [--dry-run] [--skip-push]"
9+
}
10+
11+
# Parse arguments
12+
VERSION=""
13+
DRY_RUN=false
14+
SKIP_PUSH=false
15+
16+
while [[ $# -gt 0 ]]; do
17+
case $1 in
18+
--dry-run)
19+
DRY_RUN=true
20+
shift
21+
;;
22+
--skip-push)
23+
SKIP_PUSH=true
24+
shift
25+
;;
26+
-h|--help)
27+
usage
28+
exit 0
29+
;;
30+
*)
31+
if [[ -z "$VERSION" ]]; then
32+
VERSION="$1"
33+
else
34+
echo "Error: Unknown argument: $1" >&2
35+
usage
36+
exit 1
37+
fi
38+
shift
39+
;;
40+
esac
41+
done
42+
43+
if [[ -z "$VERSION" ]]; then
44+
echo "Error: Version is required" >&2
45+
usage
46+
exit 1
47+
fi
48+
49+
# Validate version format (basic semver check)
50+
if [[ ! "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.-]+)?$ ]]; then
51+
echo "Error: Version must follow semantic versioning format (e.g., v1.2.3 or v1.2.3-beta.1)" >&2
52+
exit 1
53+
fi
54+
55+
if ! git diff-index --quiet HEAD --; then
56+
echo "Error: Working directory is not clean." >&2
57+
git status --porcelain
58+
exit 1
59+
fi
60+
61+
# Check if local branch is in sync with remote
62+
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
63+
git fetch origin "$CURRENT_BRANCH" > /dev/null 2>&1 || {
64+
echo "Error: Failed to fetch remote branch '$CURRENT_BRANCH'" >&2
65+
exit 1
66+
}
67+
68+
LOCAL_COMMIT=$(git rev-parse HEAD)
69+
REMOTE_COMMIT=$(git rev-parse "origin/$CURRENT_BRANCH" 2>/dev/null || echo "")
70+
71+
if [[ -z "$REMOTE_COMMIT" ]]; then
72+
echo "Error: Remote branch 'origin/$CURRENT_BRANCH' does not exist" >&2
73+
exit 1
74+
fi
75+
76+
if [[ "$LOCAL_COMMIT" != "$REMOTE_COMMIT" ]]; then
77+
echo "Error: Local branch '$CURRENT_BRANCH' is not in sync with remote 'origin/$CURRENT_BRANCH'" >&2
78+
echo "Local: $LOCAL_COMMIT"
79+
echo "Remote: $REMOTE_COMMIT"
80+
echo "Please pull or push to sync before releasing."
81+
exit 1
82+
fi
83+
84+
if git tag --list | grep -q "^$VERSION$"; then
85+
echo "Error: Version '$VERSION' already exists locally" >&2
86+
exit 1
87+
fi
88+
89+
# Check remote tags
90+
git fetch --tags > /dev/null 2>&1 || true
91+
if git ls-remote --tags origin | grep -q "refs/tags/$VERSION$"; then
92+
echo "Error: Version '$VERSION' already exists on remote" >&2
93+
exit 1
94+
fi
95+
96+
# Show release information
97+
COMMIT=$(git rev-parse HEAD)
98+
SHORT_COMMIT=$(git rev-parse --short HEAD)
99+
REPO_URL=$(git config --get remote.origin.url | sed 's/git@github.com:/https:\/\/github.com\//' | sed 's/\.git$//')
100+
LAST_TAG=$(git tag --sort=-version:refname | grep -v -- '-rc' | head -n 1 2>/dev/null || echo "")
101+
102+
echo "================================================"
103+
echo " Braintrust Spec Release"
104+
echo "================================================"
105+
printf "%-13s %s\n" "version:" "$VERSION"
106+
printf "%-13s %s\n" "commit:" "$SHORT_COMMIT"
107+
printf "%-13s %s\n" "code:" "$REPO_URL/commit/$COMMIT"
108+
if [[ -n "$LAST_TAG" ]]; then
109+
printf "%-13s %s\n" "changeset:" "$REPO_URL/compare/$LAST_TAG...$COMMIT"
110+
else
111+
printf "%-13s %s\n" "changeset:" "$REPO_URL/commits/$COMMIT"
112+
fi
113+
echo ""
114+
115+
# Confirmation prompt (skip in dry-run)
116+
if [[ "$DRY_RUN" == true ]]; then
117+
echo "dry-run was requested. Bailing"
118+
exit 0
119+
fi
120+
121+
read -p "Are you ready to release version $VERSION? Type 'YOLO' to continue: " -r
122+
echo ""
123+
if [[ "$REPLY" != "YOLO" ]]; then
124+
echo "Release aborted"
125+
exit 0
126+
fi
127+
128+
if ! ./scripts/test.sh; then
129+
echo "Error: tests failed" >&2
130+
exit 1
131+
fi
132+
133+
git tag -a "$VERSION" -m "Release $VERSION"
134+
if [[ "$SKIP_PUSH" == true ]]; then
135+
echo "skip-push was requested. tag is created locally but not pushed. Do what you will."
136+
exit 0
137+
fi
138+
git push origin "$VERSION"
139+
140+
echo "================================================"
141+
echo " Release Complete!"
142+
echo "================================================"
143+
echo "Version $VERSION has been created and pushed to origin."
144+
echo ""
145+
echo "View changelog: $REPO_URL/releases/tag/$VERSION"

scripts/test.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# TODO: we can verify valid test/semconv yaml once things mature more
5+
6+
exit 0
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: attachments
2+
type: llm_span_test
3+
provider: anthropic
4+
endpoint: /v1/messages
5+
requests:
6+
- model: claude-haiku-4-5-20251001
7+
temperature: 0.0
8+
max_tokens: 128
9+
messages:
10+
- role: user
11+
content:
12+
- type: text
13+
text: What color is this image?
14+
- type: image
15+
source:
16+
type: base64
17+
media_type: image/png
18+
# 1x1 red pixel
19+
data: iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==
20+
expected_brainstore_spans:
21+
- metrics:
22+
tokens: !fn is_non_negative_number
23+
prompt_tokens: !fn is_non_negative_number
24+
completion_tokens: !fn is_non_negative_number
25+
metadata:
26+
model: claude-haiku-4-5-20251001
27+
provider: anthropic
28+
span_attributes:
29+
name: anthropic.messages.create
30+
type: llm
31+
input:
32+
- role: user
33+
content:
34+
- type: text
35+
text: What color is this image?
36+
- type: image
37+
source:
38+
type: braintrust_attachment
39+
content_type: image/png
40+
filename: !fn is_non_empty_string
41+
key: !fn is_non_empty_string
42+
output:
43+
content:
44+
- text: !fn is_non_empty_string
45+
type: text
46+
role: assistant

test/llm_span/anthropic/messages.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ name: messages
22
type: llm_span_test
33
provider: anthropic
44
endpoint: /v1/messages
5-
enabled_runners: ["python", "typescript", "java", "csharp"]
65
requests:
76
- model: claude-haiku-4-5-20251001
87
temperature: 0.0
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: streaming
2+
type: llm_span_test
3+
provider: anthropic
4+
endpoint: /v1/messages
5+
requests:
6+
- model: claude-haiku-4-5-20251001
7+
temperature: 0.0
8+
max_tokens: 128
9+
system: "You are a helpful assistant."
10+
messages:
11+
- role: user
12+
content: Count from 1 to 5.
13+
stream: true
14+
expected_brainstore_spans:
15+
- metrics:
16+
time_to_first_token: !fn is_non_negative_number
17+
tokens: !fn is_non_negative_number
18+
prompt_tokens: !fn is_non_negative_number
19+
completion_tokens: !fn is_non_negative_number
20+
metadata:
21+
model: claude-haiku-4-5-20251001
22+
provider: anthropic
23+
span_attributes:
24+
name: anthropic.messages.create
25+
type: llm
26+
input:
27+
- content: Count from 1 to 5.
28+
role: user
29+
- content: "You are a helpful assistant."
30+
role: system
31+
output:
32+
content:
33+
- text: !fn is_non_empty_string
34+
type: text
35+
role: assistant

test/llm_span/google/attachments.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ name: attachments
22
type: llm_span_test
33
provider: google
44
endpoint: /v1/models/gemini-2.0-flash:generateContent
5-
enabled_runners: ["python", "typescript", "java", "go"]
65
requests:
76
- contents:
87
- role: user

test/llm_span/google/generate_content.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ name: generate_content
22
type: llm_span_test
33
provider: google
44
endpoint: /v1/models/gemini-2.5-flash:generateContent
5-
enabled_runners: ["python", "typescript", "java", "go"]
65
requests:
76
- contents:
87
- role: user

0 commit comments

Comments
 (0)