Skip to content

Commit b04e6d8

Browse files
committed
Latest updated for semi-automated eval builder with JS verification
1 parent 5fb2f0f commit b04e6d8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+173550
-14479
lines changed

evals/eval_builder_snapshots.py

Lines changed: 305 additions & 92 deletions
Large diffs are not rendered by default.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
id: action-agent-accordion-001
2+
name: Expand Accordion Section
3+
description: Test clicking to expand an accordion panel
4+
enabled: true
5+
target:
6+
url: https://jqueryui.com/accordion/
7+
wait_for: networkidle
8+
wait_timeout: 5000
9+
tool: action_agent
10+
timeout: 60000
11+
input:
12+
objective: Click to expand the "Section 2" accordion panel
13+
reasoning: Testing accordion expand/collapse interaction
14+
validation:
15+
type: js-eval
16+
js-eval:
17+
script: verify.js
18+
expected_result: true
19+
timeout: 5000
20+
metadata:
21+
tags:
22+
- action
23+
- accordion
24+
- expand
25+
- collapse
26+
- ui
27+
priority: high
28+
timeout: 60000
29+
retries: 2
30+
flaky: false
31+
owner: devtools-team
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Validation: Check if "Section 2" accordion panel is expanded
2+
// The objective is to verify that Section 2's content is visible (expanded)
3+
// The accordion is inside an iframe with class="demo-frame"
4+
5+
// Wrap in an IIFE to avoid variable conflicts
6+
(() => {
7+
// Find the iframe and access its content document
8+
const iframe = document.querySelector('iframe.demo-frame');
9+
const iframeDoc = iframe && iframe.contentDocument;
10+
11+
// Find all h3 headers in the accordion
12+
const headers = iframeDoc && iframeDoc.querySelectorAll('#accordion h3');
13+
14+
// Find the Section 2 header
15+
let section2Header = null;
16+
if (headers) {
17+
for (const header of headers) {
18+
if (header.textContent.trim() === 'Section 2') {
19+
section2Header = header;
20+
break;
21+
}
22+
}
23+
}
24+
25+
// Check if Section 2 header exists and has aria-expanded="true"
26+
// This indicates the accordion panel is expanded
27+
return section2Header && section2Header.getAttribute('aria-expanded') === 'true';
28+
})()
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
id: action-agent-autocomplete-001
2+
name: Use Autocomplete Search
3+
description: Test typing in autocomplete field and selecting from suggestions
4+
enabled: true
5+
target:
6+
url: https://jqueryui.com/autocomplete/
7+
wait_for: networkidle
8+
wait_timeout: 5000
9+
tool: action_agent
10+
timeout: 60000
11+
input:
12+
objective: Type "Java" in the autocomplete field and select "JavaScript" from suggestions
13+
reasoning: Testing autocomplete/typeahead interaction patterns
14+
validation:
15+
type: js-eval
16+
js-eval:
17+
script: '// validation script
18+
19+
'
20+
expected_result: true
21+
timeout: 5000
22+
metadata:
23+
tags:
24+
- action
25+
- autocomplete
26+
- typeahead
27+
- search
28+
- suggestions
29+
priority: high
30+
timeout: 60000
31+
retries: 2
32+
flaky: false
33+
owner: devtools-team
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Validation: Check if "JavaScript" was successfully entered/selected in the autocomplete
2+
// Objective: Type "Java" and select "JavaScript" from the autocomplete suggestions
3+
//
4+
// Since the autocomplete demo may be in an iframe or the main document,
5+
// we check both locations
6+
7+
(() => {
8+
// Helper function to check for the input value
9+
const checkInput = (doc) => {
10+
const input = doc.querySelector('#tags');
11+
return input && input.value.trim() === 'JavaScript';
12+
};
13+
14+
// Check main document first
15+
if (checkInput(document)) {
16+
return true;
17+
}
18+
19+
// Check all iframes
20+
const iframes = document.querySelectorAll('iframe');
21+
for (const iframe of iframes) {
22+
try {
23+
if (iframe.contentDocument && checkInput(iframe.contentDocument)) {
24+
return true;
25+
}
26+
} catch (e) {
27+
// Cross-origin iframe, skip it
28+
continue;
29+
}
30+
}
31+
32+
// Not found or value doesn't match
33+
return false;
34+
})()
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
id: action-agent-checkbox-001
2+
name: Toggle Newsletter Checkbox
3+
description: Test clicking checkbox elements for form options
4+
enabled: true
5+
target:
6+
url: https://www.w3schools.com/html/tryit.asp?filename=tryhtml_checkbox
7+
wait_for: networkidle
8+
wait_timeout: 5000
9+
tool: action_agent
10+
timeout: 45000
11+
input:
12+
objective: Click the checkbox labeled "I have a bike" to check it
13+
reasoning: Testing interaction with checkbox form elements
14+
validation:
15+
type: js-eval
16+
js-eval:
17+
script: verify.js
18+
expected_result: true
19+
timeout: 5000
20+
metadata:
21+
tags:
22+
- action
23+
- checkbox
24+
- form
25+
- w3schools
26+
- input
27+
priority: high
28+
timeout: 60000
29+
retries: 2
30+
flaky: false
31+
owner: devtools-team
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// Check if the "I have a bike" checkbox is checked
2+
// The checkbox is inside the iframeResult iframe (W3Schools Try It editor)
3+
(() => {
4+
const iframe = document.getElementById('iframeResult');
5+
if (!iframe || !iframe.contentDocument) return false;
6+
const checkbox = iframe.contentDocument.querySelector('input[type="checkbox"][value="Bike"]');
7+
return checkbox ? checkbox.checked === true : false;
8+
})()
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
id: action-agent-checkbox-002
2+
name: Check Extra Cheese Checkbox
3+
description: Test checking a specific checkbox using the check method
4+
enabled: true
5+
target:
6+
url: https://httpbin.org/forms/post
7+
wait_for: networkidle
8+
wait_timeout: 5000
9+
tool: action_agent
10+
timeout: 45000
11+
input:
12+
objective: Find and check the "Extra Cheese" checkbox in the Pizza Toppings section
13+
reasoning: Testing checkbox interaction functionality using check method
14+
hint: Look for the Extra Cheese checkbox and use the check method to select it
15+
validation:
16+
type: js-eval
17+
js-eval:
18+
script: verify.js
19+
expected_result: true
20+
timeout: 5000
21+
metadata:
22+
tags:
23+
- action
24+
- checkbox
25+
- check
26+
- form
27+
- httpbin
28+
priority: high
29+
timeout: 45000
30+
retries: 2
31+
flaky: false
32+
owner: devtools-team
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// Check if the "Extra Cheese" checkbox is checked
2+
// Using IIFE to avoid variable redeclaration errors
3+
(() => {
4+
const cheeseCheckbox = document.querySelector('input[type="checkbox"][name="topping"][value="cheese"]');
5+
return cheeseCheckbox && cheeseCheckbox.checked === true;
6+
})()

0 commit comments

Comments
 (0)