Skip to content

Commit b4dbafb

Browse files
authored
feat(detect-agent): add detect-agent workflow (#12)
Adds a new `detect-agent` workflow that uses [`voight-kampff-test`](https://npmx.dev/package/voight-kampff-test) to surface GitHub activity signal on PRs. PRs by `[bot]` accounts and likely automated regular accounts get an `automated` label. For regular accounts, we add a comment to the PR explaining the results. Organization members are automatically bypassed.
1 parent 51277a0 commit b4dbafb

File tree

7 files changed

+413
-1
lines changed

7 files changed

+413
-1
lines changed
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
name: detect-agent-backfill
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
LABEL:
7+
description: >
8+
Label to apply when the PR author is classified as automated.
9+
default: "automated"
10+
type: string
11+
required: false
12+
BYPASS_MEMBERS:
13+
description: >
14+
Skip scanning for org members and collaborators.
15+
default: true
16+
type: boolean
17+
required: false
18+
19+
jobs:
20+
backfill:
21+
runs-on: ubuntu-latest
22+
steps:
23+
- name: Checkout
24+
uses: actions/checkout@v4
25+
with:
26+
repository: "bombshell-dev/automation"
27+
ref: "main"
28+
path: "automation"
29+
30+
- name: Setup Node
31+
uses: actions/setup-node@v4
32+
with:
33+
node-version: 22
34+
35+
- name: Ensure label exists
36+
env:
37+
GH_TOKEN: ${{ github.token }}
38+
run: gh label create "${{ inputs.LABEL }}" --repo ${{ github.repository }} --color "D93F0B" --description "PR author detected as automated" --force
39+
40+
- name: Scan open PRs
41+
env:
42+
GH_TOKEN: ${{ github.token }}
43+
GITHUB_TOKEN: ${{ github.token }}
44+
LABEL: ${{ inputs.LABEL }}
45+
BYPASS_MEMBERS: ${{ inputs.BYPASS_MEMBERS }}
46+
run: |
47+
set -euo pipefail
48+
49+
# Get all open PRs with author info
50+
PRS=$(gh pr list --repo "$GITHUB_REPOSITORY" --state open --json number,author --limit 999)
51+
if [ "$(echo "$PRS" | jq 'length')" -eq 0 ]; then
52+
echo "No open PRs found"
53+
exit 0
54+
fi
55+
56+
# Build unique authors map: { "author": [pr_numbers] }
57+
AUTHORS=$(echo "$PRS" | jq -r '
58+
group_by(.author.login)
59+
| map({ key: .[0].author.login, value: [.[].number] })
60+
| from_entries
61+
')
62+
63+
for AUTHOR in $(echo "$AUTHORS" | jq -r 'keys[]'); do
64+
PR_NUMS=$(echo "$AUTHORS" | jq -r --arg a "$AUTHOR" '.[$a] | map(tostring) | join(",")')
65+
echo "::group::$AUTHOR (PRs: $PR_NUMS)"
66+
67+
# Bot check: [bot] suffix means GitHub App bot account
68+
if [[ "$AUTHOR" == *"[bot]" ]]; then
69+
echo "Bot account detected, labeling PRs"
70+
for PR in $(echo "$AUTHORS" | jq -r --arg a "$AUTHOR" '.[$a][]'); do
71+
gh pr edit "$PR" --repo "$GITHUB_REPOSITORY" --add-label "$LABEL" || true
72+
done
73+
echo "::endgroup::"
74+
continue
75+
fi
76+
77+
# Collaborator check
78+
if [[ "$BYPASS_MEMBERS" == "true" ]]; then
79+
STATUS=$(gh api "repos/$GITHUB_REPOSITORY/collaborators/$AUTHOR" --silent -i 2>&1 | head -1 | awk '{print $2}')
80+
if [[ "$STATUS" == "204" ]]; then
81+
echo "Skipping collaborator"
82+
echo "::endgroup::"
83+
continue
84+
fi
85+
fi
86+
87+
# Run detect-agent with --json for structured output
88+
export PR_AUTHOR="$AUTHOR"
89+
RESULT=$(node automation/dist/detect-agent.mjs --json) || {
90+
echo "::warning::Failed to analyze $AUTHOR"
91+
echo "::endgroup::"
92+
continue
93+
}
94+
95+
IS_AGENT=$(echo "$RESULT" | jq -r '.isAgent')
96+
COMMENT=$(echo "$RESULT" | jq -r '.comment')
97+
98+
if [[ "$IS_AGENT" == "true" ]]; then
99+
echo "Flagged as automated, labeling + commenting"
100+
for PR in $(echo "$AUTHORS" | jq -r --arg a "$AUTHOR" '.[$a][]'); do
101+
gh pr edit "$PR" --repo "$GITHUB_REPOSITORY" --add-label "$LABEL" || true
102+
if [ -n "$COMMENT" ]; then
103+
gh pr comment "$PR" --repo "$GITHUB_REPOSITORY" --body "$COMMENT" || true
104+
fi
105+
done
106+
else
107+
echo "Not flagged"
108+
fi
109+
110+
echo "::endgroup::"
111+
done

.github/workflows/detect-agent.yml

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
name: detect-agent
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
LABEL:
7+
description: >
8+
Label to apply when the PR author is classified as automated.
9+
default: "automated"
10+
type: string
11+
required: false
12+
BYPASS_MEMBERS:
13+
description: >
14+
Skip scanning for org members and collaborators.
15+
default: true
16+
type: boolean
17+
required: false
18+
outputs:
19+
classification:
20+
description: "The classification result: organic, mixed, or automation"
21+
value: ${{ jobs.detect.outputs.classification }}
22+
score:
23+
description: "The raw score from voight-kampff-test"
24+
value: ${{ jobs.detect.outputs.score }}
25+
is_agent:
26+
description: "Whether the PR author is classified as automated (true/false)"
27+
value: ${{ jobs.detect.outputs.is_agent }}
28+
29+
jobs:
30+
detect:
31+
runs-on: ubuntu-latest
32+
outputs:
33+
classification: ${{ steps.analyze.outputs.CLASSIFICATION }}
34+
score: ${{ steps.analyze.outputs.SCORE }}
35+
is_agent: ${{ steps.analyze.outputs.IS_AGENT }}
36+
steps:
37+
- name: Check scan cache
38+
id: cache
39+
uses: actions/cache/restore@v4
40+
with:
41+
path: .scan-marker
42+
key: detect-agent-pr-${{ github.event.pull_request.number }}
43+
44+
- name: Check for bypass
45+
if: steps.cache.outputs.cache-hit != 'true'
46+
id: bypass
47+
env:
48+
GH_TOKEN: ${{ github.token }}
49+
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
50+
BYPASS_MEMBERS: ${{ inputs.BYPASS_MEMBERS }}
51+
run: |
52+
# Fast-path: [bot] suffix means GitHub App bot account
53+
if [[ "$PR_AUTHOR" == *"[bot]" ]]; then
54+
echo "is_bot=true" >> "$GITHUB_OUTPUT"
55+
exit 0
56+
fi
57+
# Check org membership
58+
if [[ "$BYPASS_MEMBERS" == "true" ]]; then
59+
STATUS=$(gh api repos/${{ github.repository }}/collaborators/$PR_AUTHOR --silent -i 2>&1 | head -1 | awk '{print $2}')
60+
if [[ "$STATUS" == "204" ]]; then
61+
echo "skip=true" >> "$GITHUB_OUTPUT"
62+
fi
63+
fi
64+
65+
- name: Label bot account
66+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot == 'true'
67+
env:
68+
GH_TOKEN: ${{ github.token }}
69+
run: |
70+
gh label create "${{ inputs.LABEL }}" --repo ${{ github.repository }} --color "D93F0B" --description "PR author detected as automated" --force
71+
gh pr edit ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --add-label "${{ inputs.LABEL }}"
72+
73+
- name: Checkout
74+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true'
75+
uses: actions/checkout@v4
76+
with:
77+
repository: "bombshell-dev/automation"
78+
ref: "main"
79+
path: "automation"
80+
81+
- name: Setup Node
82+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true'
83+
uses: actions/setup-node@v4
84+
with:
85+
node-version: 22
86+
87+
- name: Install dependencies
88+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true'
89+
run: npm install --prefix automation
90+
91+
- id: analyze
92+
name: Analyze PR author
93+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true'
94+
env:
95+
GITHUB_TOKEN: ${{ github.token }}
96+
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
97+
run: node automation/dist/detect-agent.mjs
98+
99+
- name: Ensure label exists
100+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true' && steps.analyze.outputs.IS_AGENT == 'true'
101+
env:
102+
GH_TOKEN: ${{ github.token }}
103+
run: gh label create "${{ inputs.LABEL }}" --repo ${{ github.repository }} --color "D93F0B" --description "PR author detected as automated" --force
104+
105+
- name: Add label
106+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true' && steps.analyze.outputs.IS_AGENT == 'true'
107+
env:
108+
GH_TOKEN: ${{ github.token }}
109+
run: gh pr edit ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --add-label "${{ inputs.LABEL }}"
110+
111+
- name: Comment on PR
112+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true' && steps.analyze.outputs.IS_AGENT == 'true'
113+
env:
114+
GH_TOKEN: ${{ github.token }}
115+
COMMENT_BODY: ${{ steps.analyze.outputs.COMMENT }}
116+
run: gh pr comment ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --body "$COMMENT_BODY"
117+
118+
- name: Create scan marker
119+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true'
120+
run: echo "scanned" > .scan-marker
121+
122+
- name: Save scan cache
123+
if: steps.cache.outputs.cache-hit != 'true' && steps.bypass.outputs.is_bot != 'true' && steps.bypass.outputs.skip != 'true'
124+
uses: actions/cache/save@v4
125+
with:
126+
path: .scan-marker
127+
key: detect-agent-pr-${{ github.event.pull_request.number }}

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ All workflows are [reusable](https://docs.github.com/en/actions/sharing-automati
1515
| [publish](docs/publish.md) | Create release PRs or publish to npm via changesets |
1616
| [add-issue-to-project](docs/add-issue-to-project.md) | Add new issues to the GitHub Project with "Needs triage" status |
1717
| [move-issue-to-backlog](docs/move-issue-to-backlog.md) | Move closed issues to "Backlog" in the GitHub Project |
18+
| [detect-agent](docs/detect-agent.md) | Detect automated PR authors and label them |
1819

1920
## Acknowledgements
2021

docs/detect-agent.md

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# detect-agent
2+
3+
Detects automated (bot/agent) PR authors using [`voight-kampff-test`](https://www.npmx.dev/package/voight-kampff-test) which powers [AgentScan](https://agentscan.netlify.app/). Analyzes public GitHub activity to classify accounts as `organic`, `mixed`, or `automation`.
4+
5+
- `[bot]` accounts are labeled immediately without analysis
6+
- Organization members and collaborators are bypassed by default
7+
- Regular accounts are analyzed and, if flagged, labeled and commented on with a breakdown of signals
8+
- Results are cached per PR to avoid redundant scans
9+
10+
## Inputs
11+
12+
| Name | Type | Required | Default | Description |
13+
| ---------------- | --------- | -------- | ------------- | ------------------------------------------------------------- |
14+
| `LABEL` | `string` | No | `"automated"` | Label to apply when the PR author is classified as automated. |
15+
| `BYPASS_MEMBERS` | `boolean` | No | `true` | Skip scanning for org members and collaborators. |
16+
17+
## Outputs
18+
19+
| Name | Description |
20+
| ---------------- | ----------------------------------------------------------------- |
21+
| `classification` | The classification result: `organic`, `mixed`, or `automation` |
22+
| `score` | The raw score from voight-kampff-test |
23+
| `is_agent` | Whether the PR author is classified as automated (`true`/`false`) |
24+
25+
## Secrets
26+
27+
None beyond the default `github.token`.
28+
29+
## Usage
30+
31+
```yaml
32+
name: Detect agent
33+
34+
on:
35+
pull_request_target:
36+
types: [opened]
37+
38+
jobs:
39+
detect:
40+
uses: bombshell-dev/automation/.github/workflows/detect-agent.yml@main
41+
```
42+
43+
### With backfill support
44+
45+
Scan new PRs automatically and backfill all open PRs on demand via `workflow_dispatch`.
46+
47+
```yaml
48+
name: Detect agent
49+
50+
on:
51+
pull_request_target:
52+
types: [opened]
53+
workflow_dispatch: {}
54+
55+
jobs:
56+
detect:
57+
if: github.event_name != 'workflow_dispatch'
58+
uses: bombshell-dev/automation/.github/workflows/detect-agent.yml@main
59+
60+
backfill:
61+
if: github.event_name == 'workflow_dispatch'
62+
uses: bombshell-dev/automation/.github/workflows/detect-agent-backfill.yml@main
63+
```
64+
65+
### Custom label
66+
67+
```yaml
68+
jobs:
69+
detect:
70+
uses: bombshell-dev/automation/.github/workflows/detect-agent.yml@main
71+
with:
72+
LABEL: "bot"
73+
```
74+
75+
### Using outputs
76+
77+
```yaml
78+
jobs:
79+
detect:
80+
uses: bombshell-dev/automation/.github/workflows/detect-agent.yml@main
81+
82+
review:
83+
needs: detect
84+
if: needs.detect.outputs.is_agent != 'true'
85+
runs-on: ubuntu-latest
86+
steps:
87+
- run: echo "PR is from a human (${{ needs.detect.outputs.classification }})"
88+
```

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
},
1212
"devDependencies": {
1313
"@bomb.sh/tools": "^0.2.1",
14-
"@types/node": "^22.19.15"
14+
"@types/node": "^22.19.15",
15+
"voight-kampff-test": "^2.2.0"
1516
}
1617
}

pnpm-lock.yaml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)