test_bed/test_gui_agent_resolution_fix.py at main · tylerelyt/test_bed · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
测试 GUI Agent 分辨率修复效果
验证传递给模型的分辨率是否正确
"""

import sys
import os
from pathlib import Path
from io import BytesIO

# 添加项目路径
sys.path.insert(0, str(Path(__file__).parent / "src"))

print("=" * 70)
print("GUI Agent 分辨率修复测试")
print("=" * 70)
print()

# 1. 测试屏幕分辨率检测
print("📋 步骤 1: 检测屏幕分辨率")
print("-" * 70)

try:
    import pyautogui
    from PIL import Image

    # 获取逻辑分辨率
    logical_size = pyautogui.size()
    print(f"✅ PyAutoGUI 逻辑屏幕尺寸: {logical_size.width}x{logical_size.height}")

    # 获取截图物理分辨率
    screenshot = pyautogui.screenshot()
    physical_size = screenshot.size
    print(f"✅ 截图物理分辨率: {physical_size[0]}x{physical_size[1]}")

    # 计算缩放比例
    scale_x = physical_size[0] / logical_size.width
    scale_y = physical_size[1] / logical_size.height

    print(f"\n分辨率缩放比例:")
    print(f"  - X 轴: {scale_x:.2f}x")
    print(f"  - Y 轴: {scale_y:.2f}x")

    if scale_x != 1.0 or scale_y != 1.0:
        print(f"\n⚠️  检测到 Retina/HiDPI 显示器（缩放比例: {scale_x:.2f}x）")
    else:
        print(f"\n✅ 标准显示器（无缩放）")

except Exception as e:
    print(f"❌ 检测失败: {e}")
    sys.exit(1)

print()
print("=" * 70)
print("📋 步骤 2: 测试 SimplePromptAgent 分辨率提取")
print("-" * 70)

try:
    from search_engine.gui_agent_service import SimplePromptAgent

    # 创建模拟 agent（不需要真实的 API Key）
    agent = SimplePromptAgent(
        model="qwen3-vl-plus",
        api_key="test_key_not_used",
        enable_thinking=False
    )

    # 创建模拟观察对象
    buffer = BytesIO()
    screenshot.save(buffer, format='PNG')
    screenshot_bytes = buffer.getvalue()

    observation = {
        'screenshot': screenshot_bytes,
        'screenshot_path': None,
        'timestamp': '2025-01-01T00:00:00'
    }

    # 测试分辨率提取逻辑（不实际调用模型）
    print("正在测试分辨率提取逻辑...")

    # 模拟 predict 方法中的分辨率提取逻辑
    screenshot_b64 = agent._encode_screenshot(observation['screenshot'])

    # 提取屏幕分辨率 - 使用修复后的逻辑
    screen_size = None
    try:
        logical_size = pyautogui.size()
        screen_size = (logical_size.width, logical_size.height)
        print(f"✅ Agent 提取的屏幕尺寸: {screen_size[0]}x{screen_size[1]}")
    except Exception as e:
        print(f"❌ Agent 分辨率提取失败: {e}")

    # 验证结果
    if screen_size:
        if screen_size[0] == logical_size.width and screen_size[1] == logical_size.height:
            print(f"✅ 分辨率提取正确！使用的是 PyAutoGUI 逻辑尺寸")
            print(f"   模型将看到正确的坐标范围: x ∈ [0, {screen_size[0]-1}], y ∈ [0, {screen_size[1]-1}]")
        else:
            print(f"❌ 分辨率提取错误！")
            print(f"   预期: {logical_size.width}x{logical_size.height}")
            print(f"   实际: {screen_size[0]}x{screen_size[1]}")

except Exception as e:
    import traceback
    print(f"❌ 测试失败: {e}")
    print(traceback.format_exc())
    sys.exit(1)

print()
print("=" * 70)
print("📋 步骤 3: 验证坐标转换")
print("-" * 70)

# 测试几个关键位置
test_points = [
    ("左上角", 0, 0),
    ("右下角", logical_size.width - 1, logical_size.height - 1),
    ("屏幕中心", logical_size.width // 2, logical_size.height // 2),
    ("右上角", logical_size.width - 1, 0),
    ("左下角", 0, logical_size.height - 1),
]

print(f"逻辑屏幕尺寸: {logical_size.width}x{logical_size.height}")
print(f"截图物理尺寸: {physical_size[0]}x{physical_size[1]}")
print()

for name, x, y in test_points:
    # 如果模型看到的是物理分辨率，它会给出错误的坐标
    wrong_x = int(x * scale_x)
    wrong_y = int(y * scale_y)

    print(f"{name:12s} - 正确坐标: ({x:4d}, {y:4d})")
    if scale_x != 1.0:
        print(f"             - 错误坐标（如果使用物理分辨率）: ({wrong_x:4d}, {wrong_y:4d})")

print()
print("=" * 70)
print("📊 测试总结")
print("=" * 70)

if screen_size and screen_size[0] == logical_size.width:
    print("✅ 修复成功！")
    print()
    print("修复效果：")
    print(f"  1. 模型现在看到的分辨率: {screen_size[0]}x{screen_size[1]} （逻辑尺寸）")
    print(f"  2. PyAutoGUI 使用的坐标系统: {logical_size.width}x{logical_size.height} （逻辑尺寸）")
    print(f"  3. 坐标系统一致，点击位置准确 ✅")
    print()
    if scale_x != 1.0:
        print(f"说明：")
        print(f"  - 您的显示器是 Retina/HiDPI 显示器（{scale_x:.1f}x 缩放）")
        print(f"  - 截图的物理分辨率是 {physical_size[0]}x{physical_size[1]}")
        print(f"  - 但我们告诉模型使用逻辑分辨率 {logical_size.width}x{logical_size.height}")
        print(f"  - 这样模型预测的坐标就能与 PyAutoGUI 的坐标系统匹配")
else:
    print("❌ 修复失败！")
    print()
    print("问题：")
    print(f"  - 模型看到的分辨率与 PyAutoGUI 坐标系统不一致")
    print(f"  - 这会导致点击位置不准确")

print()
print("=" * 70)