Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions examples/agent/browser_agent/browser_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ async def reply( # pylint: disable=R0912,R0915

if not msg_reasoning.has_content_blocks("tool_use"):
# If structured output is required but no tool call is
# made, remind the llm to go on the task
# made, require tool call in the next reasoning step
msg_hint = Msg(
"user",
"<system-hint>Structured output is "
Expand All @@ -258,11 +258,10 @@ async def reply( # pylint: disable=R0912,R0915
f"required structured output.</system-hint>",
"user",
)
await self._reasoning_hint_msgs.add(msg_hint)
# Require tool call in the next reasoning step
tool_choice = "required"

if msg_hint and self.print_hint_msg:
if msg_hint:
await self.memory.add(msg_hint)
await self.print(msg_hint)

elif not msg_reasoning.has_content_blocks("tool_use"):
Expand Down Expand Up @@ -299,14 +298,9 @@ async def _pure_reasoning(
Msg("system", self.sys_prompt, "system"),
*await self.memory.get_memory(),
msg,
# The hint messages to guide the agent's behavior, maybe empty
*await self._reasoning_hint_msgs.get_memory(),
],
)

# Clear the hint messages after use
await self._reasoning_hint_msgs.clear()

res = await self.model(
prompt,
tools=self.no_screenshot_tool_list,
Expand Down Expand Up @@ -355,8 +349,9 @@ async def _reasoning_with_observation(self) -> Msg:
self.previous_chunkwise_information = ""
self.snapshot_in_chunk = []

mem_len = await self.memory.size()
await self.memory.delete(mem_len - 1)
mem = await self.memory.get_memory()
if mem:
await self.memory.delete([mem[-1].id])

self.snapshot_in_chunk = await self._get_snapshot_in_text()
for _ in self.snapshot_in_chunk:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ Your goal is to complete given tasks by controlling a browser to navigate web pa
5. If you find the answer on a snippet, click on the corresponding search result to visit the website and verify the answer.
6. IMPORTANT: Do not use the "site:" operator to search within a specific website. Always use keywords related to the problem instead.
- Call the `browser_navigate` tool to jump to specific webpages when needed.
- **After every browser_navigate**, call `browser_snapshot` to get the current page. Use **only** the refs from that snapshot (e.g. `ref=e36`, `ref=e72`) for `browser_click`, `browser_type`, etc. Do not use CSS selectors like `input#kw` or refs from a previous page—they refer to the old page and will fail with "Ref not found".
- Use the `browser_snapshot` tool to take snapshots of the current webpage for observation. Scroll will be automatically performed to capture the full page.
- If a tool returns "Ref ... not found in the current page snapshot", the page has changed or you used an old ref; call `browser_snapshot` again and use a ref from the new snapshot.
- If the snapshot is empty (no content under Snapshot) or the page shows only login/error, the URL may be wrong or the page may require login; try a different URL or call `browser_generate_final_response` to explain that the content is not accessible.
- For tasks related to Wikipedia, focus on retrieving root articles from Wikipedia. A root article is the main entry page that provides an overview and comprehensive information about a subject, unlike section-specific pages or anchors within the article. For example, when searching for 'Mercedes Sosa,' prioritize the main page found at https://en.wikipedia.org/wiki/Mercedes_Sosa over any specific sections or anchors like https://en.wikipedia.org/wiki/Mercedes_Sosa#Studio_albums.
- Avoid using Google Scholar. If a researcher is searched, try to use his/her homepage instead.
- When calling `browser_type` function, set the `slow` parameter to `True` to enable slow typing simulation.
Expand All @@ -32,6 +35,7 @@ Your goal is to complete given tasks by controlling a browser to navigate web pa
### Observing Guidelines
- Always take action based on the elements on the webpage. Never create urls or generate new pages.
- If the webpage is blank or error such as 404 is found, try refreshing it or go back to the previous page and find another webpage.
- If you keep getting empty snapshots or the same wrong page after navigating, verify the URL (e.g. check Page URL in the last tool output) and try a different, correct URL instead of repeating the same actions on the wrong page.
- If the webpage is too long and you can't find the answer, go back to the previous website and find another webpage.
- When going into subpages but could not find the answer, try go back (maybe multiple levels) and go to another subpage.
- Review the webpage to check if subtasks are completed. An action may seem to be successful at a moment but not successful later. If this happens, just take the action again.
Expand Down
3 changes: 2 additions & 1 deletion examples/agent/browser_agent/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ async def main(
name="Browser-Use Agent",
model=DashScopeChatModel(
api_key=os.environ.get("DASHSCOPE_API_KEY"),
model_name="qwen-max",
model_name="qwen3-max",
stream=False,
),
formatter=DashScopeChatFormatter(),
Expand All @@ -63,6 +63,7 @@ async def main(
if msg.get_text_content() == "exit":
break
msg = await agent(msg, structured_model=FinalResult)
await agent.memory.clear()

except Exception as e:
traceback.print_exc()
Expand Down
Loading