From 440551d1c65586bcc565a5c0714a8e733533fc89 Mon Sep 17 00:00:00 2001 From: feifeiyechuan <1010155688@qq.com> Date: Sat, 20 Dec 2025 17:41:03 +0800 Subject: [PATCH 1/2] feat: add structured result saving and main_params API --- .gitignore | 5 +- README_UPDATE.md | 489 ++++++++++++++++++++++++++++ main.py | 494 ++++++++++++++++++++++++++--- phone_agent/actions/handler.py | 4 +- phone_agent/actions/handler_ios.py | 4 +- phone_agent/adb/device.py | 18 +- phone_agent/agent.py | 5 + phone_agent/agent_ios.py | 6 + phone_agent/device_factory.py | 43 ++- phone_agent/hdc/device.py | 31 +- phone_agent/xctest/device.py | 17 +- scripts/get_package_name.py | 235 ++++++++++++++ 12 files changed, 1271 insertions(+), 80 deletions(-) create mode 100644 README_UPDATE.md create mode 100644 scripts/get_package_name.py diff --git a/.gitignore b/.gitignore index 16a3ba29..856dd790 100644 --- a/.gitignore +++ b/.gitignore @@ -60,4 +60,7 @@ call_model.py app_package_name.py .claude/ -.venv \ No newline at end of file +.venv + +*test* +results/ \ No newline at end of file diff --git a/README_UPDATE.md b/README_UPDATE.md new file mode 100644 index 00000000..bbe63bc1 --- /dev/null +++ b/README_UPDATE.md @@ -0,0 +1,489 @@ +# 更新日志 - 新功能说明 + +本文档记录了 Open-AutoGLM 项目的最新功能更新和使用说明。 + +## 📅 更新日期 +2024年最新更新 + +--- + +## 🆕 新增功能 + +### 1. 结果输出功能 (`--output`) + +#### 功能说明 +新增了 `--output`(或 `-o`)参数,可以将任务执行结果保存到指定的文件中。结果统一以 **JSON 数组** 格式保存。 + +#### 使用方法 + +**命令行方式:** +```bash +# 保存结果到 results/result.json 文件 +python main.py --output ./results/result.json "打开微信并发送消息" + +# 使用短参数 +python main.py -o ./results/result.json "打开微信" +``` + +**编程方式:** +```python +from main import main_params + +main_params( + task="打开微信", + output="./results/result.json" +) +``` + +#### 输出说明 +- 结果会保存为指定路径的 JSON 文件 +- 如果父级文件夹不存在,会自动创建 +- 任务完成后会显示:`result保存到{output}文件` + +#### 示例 +```bash +$ python main.py --output ./results/result.json "打开微信" +Task: 打开微信 + +[执行过程...] + +Result: 任务完成 + +result保存到./results/result.json文件 +``` + +--- + +### 2. 结构化 JSON 输出与全量步骤保存 (`--all`) + +#### 功能说明 +现在结果统一以 **JSON 数组** 格式保存。 +新增了 `--all` 参数,允许用户控制是仅保存最终结果,还是保存执行过程中的所有步骤结果。 + +#### 使用方法 + +**命令行方式:** +```bash +# 仅保存最后结果 (默认,输出为单元素 JSON 数组) +python main.py --output ./results/result.json "打开微信" + +# 保存所有步骤的结果 +python main.py --output ./results/all_steps.json --all "打开微信" +``` + +**编程方式:** +```python +from main import main_params + +# 保存所有步骤的结果 +main_params( + task="打开微信", + output="./results/all_steps.json", + save_all=True +) +``` + +#### 输出格式示例 + +**仅保存最后结果时:** +```json +[ + "任务完成" +] +``` + +**保存所有步骤时 (`--all`):** +```json +[ + "正在打开微信", + "已进入微信主界面", + "任务完成" +] +``` + +--- + +### 3. 编程接口 `main_params()` 函数 + +#### 功能说明 +新增了 `main_params()` 函数,支持通过函数参数的方式调用主程序,方便从其他 Python 脚本中集成使用。 + +#### 函数签名 +```python +def main_params( + base_url: str = None, + model: str = None, + apikey: str = None, + max_steps: int = None, + device_id: str = None, + connect: str = None, + disconnect: str = None, + list_devices: bool = False, + enable_tcpip: int = None, + wda_url: str = None, + pair: bool = False, + wda_status: bool = False, + quiet: bool = False, + list_apps: bool = False, + lang: str = None, + device_type: str = None, + output: str = None, + task: str = None, + allow_all_apps: bool = False, + save_all: bool = False, +) -> None +``` + +#### 使用方法 + +**基本示例:** +```python +from main import main_params + +# 使用默认配置 +main_params(task="打开微信") + +# 自定义配置 +main_params( + base_url="http://localhost:8000/v1", + model="autoglm-phone-9b", + task="打开微信并发送消息", + output="./results/result.json", + device_type="adb" +) +``` + +**完整示例:** +```python +from main import main_params + +# iOS 设备示例 +main_params( + base_url="http://localhost:8000/v1", + model="autoglm-phone-9b", + apikey="your-api-key", + device_type="ios", + wda_url="http://localhost:8100", + task="打开Safari并搜索", + output="./ios_results/search_res.json", + max_steps=50, + lang="cn", + save_all=True +) +``` + +#### 参数说明 + +| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| `base_url` | str | 环境变量或默认值 | 模型API基础URL | +| `model` | str | 环境变量或默认值 | 模型名称 | +| `apikey` | str | 环境变量或默认值 | API密钥 | +| `max_steps` | int | 100 | 最大执行步数 | +| `device_id` | str | None | 设备ID | +| `device_type` | str | "adb" | 设备类型:adb/hdc/ios | +| `output` | str | None | 输出文件路径 (JSON) | +| `task` | str | None | 要执行的任务 | +| `allow_all_apps` | bool | False | 是否允许启动所有应用 | +| `save_all` | bool | False | 是否保存所有中间步骤结果 | +| `lang` | str | "cn" | 语言:cn/en | +| `quiet` | bool | False | 是否静默模式 | +| `wda_url` | str | None | iOS WebDriverAgent URL | +| ... | ... | ... | 其他参数见函数文档 | + +--- + +### 4. 允许所有应用功能 (`--allow-all-apps`) + +#### 功能说明 +新增了 `--allow-all-apps` 参数,允许启动任何应用,不再限制在配置的应用列表中。当启用此选项时,可以直接使用应用的包名(Android)、Bundle ID(iOS)或 Bundle Name(HarmonyOS)来启动应用。 + +#### 使用方法 + +**命令行方式:** +```bash +# 限制在应用列表中(默认行为) +python main.py "打开微信" + +# 允许所有应用,直接使用包名 +python main.py --allow-all-apps "打开com.example.myapp" +``` + +**编程方式:** +```python +from main import main_params + +# 限制在应用列表中 +main_params(task="打开微信", allow_all_apps=False) + +# 允许所有应用 +main_params(task="打开com.example.myapp", allow_all_apps=True) +``` + +#### 使用场景 + +1. **测试未配置的应用** + ```bash + python main.py --allow-all-apps "打开com.example.testapp" + ``` + +2. **使用包名直接启动** + ```bash + # Android + python main.py --allow-all-apps "打开com.android.chrome" + + # iOS + python main.py --device-type ios --allow-all-apps "打开com.apple.Safari" + ``` + +3. **动态应用管理** + - 不需要修改配置文件即可启动新应用 + - 适合开发和测试环境 + +#### 注意事项 + +- 当 `allow_all_apps=True` 时,应用名称会被直接当作包名/Bundle ID使用 +- 确保包名/Bundle ID正确,否则可能无法启动应用 +- 建议在已知包名的情况下使用此功能 + +--- + +### 5. 应用包名查询工具 (`scripts/get_package_name.py`) + +#### 功能说明 +新增了一个实用的 Python 脚本工具,用于查询 Android 应用的包名。支持多种查询方式,方便开发者查找和添加新应用到配置中。 + +#### 安装要求 +- 已安装 ADB 工具 +- 设备已连接并启用 USB 调试 + +#### 使用方法 + +**1. 列出所有第三方应用** +```bash +python scripts/get_package_name.py list +``` + +**2. 列出所有应用(包括系统应用)** +```bash +python scripts/get_package_name.py list-all +``` + +**3. 查看当前前台应用的包名** +```bash +# 先打开你想查询的应用,然后运行: +python scripts/get_package_name.py current +``` + +**4. 搜索包含关键词的包名** +```bash +# 搜索微信相关应用 +python scripts/get_package_name.py search wechat + +# 搜索腾讯相关应用 +python scripts/get_package_name.py search tencent +``` + +**5. 查看应用的详细信息** +```bash +python scripts/get_package_name.py info com.tencent.mm +``` + +**6. 指定设备ID(多设备时)** +```bash +python scripts/get_package_name.py device <设备ID> current +python scripts/get_package_name.py device emulator-5554 list +``` + +#### 使用示例 + +**示例1:查找微信包名** +```bash +$ python scripts/get_package_name.py search tencent +搜索包含 'tencent' 的包名: +------------------------------------------------------------ + com.tencent.mm + com.tencent.mobileqq + com.tencent.qqmusic + com.tencent.qqlive + com.tencent.androidqqmail + com.tencent.news + +找到 6 个匹配的应用 +``` + +**示例2:查看当前应用** +```bash +$ python scripts/get_package_name.py current +当前前台应用包名: com.tencent.mm +应用名称: 微信 +``` + +**示例3:获取应用详细信息** +```bash +$ python scripts/get_package_name.py info com.tencent.mm +应用信息: com.tencent.mm +------------------------------------------------------------ +包名: com.tencent.mm +应用名称: 微信 +版本: 8.0.xx +``` + +#### 添加到配置文件 + +找到包名后,可以添加到 `phone_agent/config/apps.py`: + +```python +APP_PACKAGES: dict[str, str] = { + # ... 现有应用 ... + "新应用名称": "com.example.newapp", # 添加新应用 + "新应用英文名": "com.example.newapp", # 支持多个名称映射到同一包名 +} +``` + +#### 其他查询方法 + +**使用 ADB 命令直接查询:** +```bash +# 列出所有第三方应用 +adb shell pm list packages -3 + +# 搜索特定应用 +adb shell pm list packages | grep wechat + +# 查看当前前台应用 +adb shell dumpsys window | grep -E 'mCurrentFocus|mFocusedApp' +``` + +--- + +## 📝 配置说明 + +### 环境变量支持 + +所有参数都支持通过环境变量设置: + +```bash +# 设置模型API地址 +export PHONE_AGENT_BASE_URL="http://localhost:8000/v1" + +# 设置模型名称 +export PHONE_AGENT_MODEL="autoglm-phone-9b" + +# 设置API密钥 +export PHONE_AGENT_API_KEY="your-api-key" + +# 设置最大步数 +export PHONE_AGENT_MAX_STEPS="100" + +# 设置设备ID +export PHONE_AGENT_DEVICE_ID="emulator-5554" + +# 设置设备类型 +export PHONE_AGENT_DEVICE_TYPE="adb" + +# 设置语言 +export PHONE_AGENT_LANG="cn" + +# iOS WebDriverAgent URL +export PHONE_AGENT_WDA_URL="http://localhost:8100" +``` + +--- + +## 🔧 完整使用示例 + +### 示例1:基本使用 +```bash +python main.py "打开微信并发送消息给张三" +``` + +### 示例2:保存结果到文件 +```bash +python main.py --output ./results/result.json "打开微信" +``` + +### 示例3:允许所有应用 +```bash +python main.py --allow-all-apps "打开com.example.myapp" +``` + +### 示例4:iOS设备使用 +```bash +python main.py \ + --device-type ios \ + --wda-url http://localhost:8100 \ + --output ./ios_results/res.json \ + "打开Safari并搜索" +``` + +### 示例5:编程集成 +```python +from main import main_params + +def my_automation_task(): + result = main_params( + base_url="http://localhost:8000/v1", + model="autoglm-phone-9b", + task="打开微信并发送消息", + output="./results/result.json", + allow_all_apps=False, + max_steps=50 + ) + return result + +if __name__ == "__main__": + my_automation_task() +``` + +--- + +## 🐛 故障排除 + +### 问题1:无法保存结果文件 +**解决方案:** +- 确保有写入权限 +- 检查输出路径是否正确 +- 确保磁盘空间充足 + +### 问题2:无法启动未配置的应用 +**解决方案:** +- 使用 `--allow-all-apps` 参数 +- 或先使用 `scripts/get_package_name.py` 查找包名,然后添加到配置 + +### 问题3:包名查询工具无法使用 +**解决方案:** +- 确保 ADB 已安装并在 PATH 中 +- 确保设备已连接:`adb devices` +- 确保已启用 USB 调试 + +--- + +## 📚 相关文件 + +- `main.py` - 主程序文件,包含所有新功能 +- `phone_agent/config/apps.py` - 应用配置映射 +- `scripts/get_package_name.py` - 包名查询工具 +- `phone_agent/agent.py` - Android/HarmonyOS Agent +- `phone_agent/agent_ios.py` - iOS Agent + +--- + +## 🔄 更新历史 + +### 最新更新 +- ✅ 修改 `output` 参数为具体文件路径,支持 JSON 数组格式 +- ✅ 添加 `--output` 参数支持结果保存 +- ✅ 新增 `main_params()` 编程接口 +- ✅ 添加 `--allow-all-apps` 参数支持所有应用 +- ✅ 创建包名查询工具脚本 + +--- + +## 💡 提示 + +1. **结果保存**:现在支持指定具体 JSON 文件路径 +2. **应用配置**:优先使用配置列表中的应用,更稳定可靠 +3. **包名查询**:使用工具脚本可以快速找到应用的包名 +4. **编程集成**:使用 `main_params()` 可以更好地集成到其他项目中 + diff --git a/main.py b/main.py index 624d2bf7..865bc111 100755 --- a/main.py +++ b/main.py @@ -24,23 +24,31 @@ from phone_agent import PhoneAgent from phone_agent.agent import AgentConfig +from phone_agent.agent_ios import IOSAgentConfig, IOSPhoneAgent from phone_agent.config.apps import list_supported_apps from phone_agent.config.apps_harmonyos import list_supported_apps as list_harmonyos_apps +from phone_agent.config.apps_ios import list_supported_apps as list_ios_apps from phone_agent.device_factory import DeviceType, get_device_factory, set_device_type from phone_agent.model import ModelConfig +from phone_agent.xctest import XCTestConnection +from phone_agent.xctest import list_devices as list_ios_devices -def check_system_requirements(device_type: DeviceType = DeviceType.ADB) -> bool: +def check_system_requirements( + device_type: DeviceType = DeviceType.ADB, wda_url: str = "http://localhost:8100" +) -> bool: """ Check system requirements before running the agent. Checks: - 1. ADB/HDC tools installed + 1. ADB/HDC/iOS tools installed 2. At least one device connected 3. ADB Keyboard installed on the device (for ADB only) + 4. WebDriverAgent running (for iOS only) Args: - device_type: Type of device tool (ADB or HDC). + device_type: Type of device tool (ADB, HDC, or IOS). + wda_url: WebDriverAgent URL (for iOS only). Returns: True if all checks pass, False otherwise. @@ -51,8 +59,12 @@ def check_system_requirements(device_type: DeviceType = DeviceType.ADB) -> bool: all_passed = True # Determine tool name and command - tool_name = "ADB" if device_type == DeviceType.ADB else "HDC" - tool_cmd = "adb" if device_type == DeviceType.ADB else "hdc" + if device_type == DeviceType.IOS: + tool_name = "libimobiledevice" + tool_cmd = "idevice_id" + else: + tool_name = "ADB" if device_type == DeviceType.ADB else "HDC" + tool_cmd = "adb" if device_type == DeviceType.ADB else "hdc" # Check 1: Tool installed print(f"1. Checking {tool_name} installation...", end=" ") @@ -66,20 +78,31 @@ def check_system_requirements(device_type: DeviceType = DeviceType.ADB) -> bool: print( " - Windows: Download from https://developer.android.com/studio/releases/platform-tools" ) - else: - print(" - Download from HarmonyOS SDK or https://gitee.com/openharmony/docs") + elif device_type == DeviceType.HDC: + print( + " - Download from HarmonyOS SDK or https://gitee.com/openharmony/docs" + ) print(" - Add to PATH environment variable") + else: # IOS + print(" - macOS: brew install libimobiledevice") + print(" - Linux: sudo apt-get install libimobiledevice-utils") all_passed = False else: # Double check by running version command try: - version_cmd = [tool_cmd, "version"] if device_type == DeviceType.ADB else [tool_cmd, "-v"] + if device_type == DeviceType.ADB: + version_cmd = [tool_cmd, "version"] + elif device_type == DeviceType.HDC: + version_cmd = [tool_cmd, "-v"] + else: # IOS + version_cmd = [tool_cmd, "-ln"] + result = subprocess.run( version_cmd, capture_output=True, text=True, timeout=10 ) if result.returncode == 0: version_line = result.stdout.strip().split("\n")[0] - print(f"✅ OK ({version_line})") + print(f"✅ OK ({version_line if version_line else 'installed'})") else: print("❌ FAILED") print(f" Error: {tool_name} command failed to run.") @@ -108,13 +131,18 @@ def check_system_requirements(device_type: DeviceType = DeviceType.ADB) -> bool: ) lines = result.stdout.strip().split("\n") # Filter out header and empty lines, look for 'device' status - devices = [line for line in lines[1:] if line.strip() and "\tdevice" in line] - else: # HDC + devices = [ + line for line in lines[1:] if line.strip() and "\tdevice" in line + ] + elif device_type == DeviceType.HDC: result = subprocess.run( ["hdc", "list", "targets"], capture_output=True, text=True, timeout=10 ) lines = result.stdout.strip().split("\n") devices = [line for line in lines if line.strip()] + else: # IOS + ios_devices = list_ios_devices() + devices = [d.device_id for d in ios_devices] if not devices: print("❌ FAILED") @@ -123,18 +151,31 @@ def check_system_requirements(device_type: DeviceType = DeviceType.ADB) -> bool: if device_type == DeviceType.ADB: print(" 1. Enable USB debugging on your Android device") print(" 2. Connect via USB and authorize the connection") - print(" 3. Or connect remotely: python main.py --connect :") - else: + print( + " 3. Or connect remotely: python main.py --connect :" + ) + elif device_type == DeviceType.HDC: print(" 1. Enable USB debugging on your HarmonyOS device") print(" 2. Connect via USB and authorize the connection") - print(" 3. Or connect remotely: python main.py --device-type hdc --connect :") + print( + " 3. Or connect remotely: python main.py --device-type hdc --connect :" + ) + else: # IOS + print(" 1. Connect your iOS device via USB") + print(" 2. Unlock device and tap 'Trust This Computer'") + print(" 3. Verify: idevice_id -l") + print(" 4. Or connect via WiFi using device IP") all_passed = False else: if device_type == DeviceType.ADB: device_ids = [d.split("\t")[0] for d in devices] - else: + elif device_type == DeviceType.HDC: device_ids = [d.strip() for d in devices] - print(f"✅ OK ({len(devices)} device(s): {', '.join(device_ids)})") + else: # IOS + device_ids = devices + print( + f"✅ OK ({len(devices)} device(s): {', '.join(device_ids[:2])}{'...' if len(device_ids) > 2 else ''})" + ) except subprocess.TimeoutExpired: print("❌ FAILED") print(f" Error: {tool_name} command timed out.") @@ -150,7 +191,7 @@ def check_system_requirements(device_type: DeviceType = DeviceType.ADB) -> bool: print("❌ System check failed. Please fix the issues above.") return False - # Check 3: ADB Keyboard installed (only for ADB) + # Check 3: ADB Keyboard installed (only for ADB) or WebDriverAgent (for iOS) if device_type == DeviceType.ADB: print("3. Checking ADB Keyboard...", end=" ") try: @@ -185,10 +226,38 @@ def check_system_requirements(device_type: DeviceType = DeviceType.ADB) -> bool: print("❌ FAILED") print(f" Error: {e}") all_passed = False - else: + elif device_type == DeviceType.HDC: # For HDC, skip keyboard check as it uses different input method print("3. Skipping keyboard check for HarmonyOS...", end=" ") print("✅ OK (using native input)") + else: # IOS + # Check WebDriverAgent + print(f"3. Checking WebDriverAgent ({wda_url})...", end=" ") + try: + conn = XCTestConnection(wda_url=wda_url) + + if conn.is_wda_ready(): + print("✅ OK") + # Get WDA status for additional info + status = conn.get_wda_status() + if status: + session_id = status.get("sessionId", "N/A") + print(f" Session ID: {session_id}") + else: + print("❌ FAILED") + print(" Error: WebDriverAgent is not running or not accessible.") + print(" Solution:") + print(" 1. Run WebDriverAgent on your iOS device via Xcode") + print(" 2. For USB: Set up port forwarding: iproxy 8100 8100") + print( + " 3. For WiFi: Use device IP, e.g., --wda-url http://192.168.1.100:8100" + ) + print(" 4. Verify in browser: open http://localhost:8100/status") + all_passed = False + except Exception as e: + print("❌ FAILED") + print(f" Error: {e}") + all_passed = False print("-" * 50) @@ -290,7 +359,7 @@ def parse_args() -> argparse.Namespace: formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - # Run with default settings + # Run with default settings (Android) python main.py # Specify model endpoint @@ -313,6 +382,22 @@ def parse_args() -> argparse.Namespace: # List supported apps python main.py --list-apps + + # iOS specific examples + # Run with iOS device + python main.py --device-type ios "Open Safari and search for iPhone tips" + + # Use WiFi connection for iOS + python main.py --device-type ios --wda-url http://192.168.1.100:8100 + + # List connected iOS devices + python main.py --device-type ios --list-devices + + # Check WebDriverAgent status + python main.py --device-type ios --wda-status + + # Pair with iOS device + python main.py --device-type ios --pair """, ) @@ -384,6 +469,26 @@ def parse_args() -> argparse.Namespace: help="Enable TCP/IP debugging on USB device (default port: 5555)", ) + # iOS specific options + parser.add_argument( + "--wda-url", + type=str, + default=os.getenv("PHONE_AGENT_WDA_URL", "http://localhost:8100"), + help="WebDriverAgent URL for iOS (default: http://localhost:8100)", + ) + + parser.add_argument( + "--pair", + action="store_true", + help="Pair with iOS device (required for some operations)", + ) + + parser.add_argument( + "--wda-status", + action="store_true", + help="Show WebDriverAgent status and exit (iOS only)", + ) + # Other options parser.add_argument( "--quiet", "-q", action="store_true", help="Suppress verbose output" @@ -404,9 +509,28 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--device-type", type=str, - choices=["adb", "hdc"], + choices=["adb", "hdc", "ios"], default=os.getenv("PHONE_AGENT_DEVICE_TYPE", "adb"), - help="Device type: adb for Android, hdc for HarmonyOS (default: adb)", + help="Device type: adb for Android, hdc for HarmonyOS, ios for iPhone (default: adb)", + ) + + parser.add_argument( + "--output", + "-o", + type=str, + help="Output file path to save result (e.g., results/result.json)", + ) + + parser.add_argument( + "--all", + action="store_true", + help="Save all intermediate step results to the output file, not just the final result", + ) + + parser.add_argument( + "--allow-all-apps", + action="store_true", + help="Allow launching any app, not limited to the configured app list", ) parser.add_argument( @@ -419,6 +543,81 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() +def handle_ios_device_commands(args) -> bool: + """ + Handle iOS device-related commands. + + Returns: + True if a device command was handled (should exit), False otherwise. + """ + conn = XCTestConnection(wda_url=args.wda_url) + + # Handle --list-devices + if args.list_devices: + devices = list_ios_devices() + if not devices: + print("No iOS devices connected.") + print("\nTroubleshooting:") + print(" 1. Connect device via USB") + print(" 2. Unlock device and trust this computer") + print(" 3. Run: idevice_id -l") + else: + print("Connected iOS devices:") + print("-" * 70) + for device in devices: + conn_type = device.connection_type.value + model_info = f"{device.model}" if device.model else "Unknown" + ios_info = f"iOS {device.ios_version}" if device.ios_version else "" + name_info = device.device_name or "Unnamed" + + print(f" ✓ {name_info}") + print(f" UUID: {device.device_id}") + print(f" Model: {model_info}") + print(f" OS: {ios_info}") + print(f" Connection: {conn_type}") + print("-" * 70) + return True + + # Handle --pair + if args.pair: + print("Pairing with iOS device...") + success, message = conn.pair_device(args.device_id) + print(f"{'✓' if success else '✗'} {message}") + return True + + # Handle --wda-status + if args.wda_status: + print(f"Checking WebDriverAgent status at {args.wda_url}...") + print("-" * 50) + + if conn.is_wda_ready(): + print("✓ WebDriverAgent is running") + + status = conn.get_wda_status() + if status: + print(f"\nStatus details:") + value = status.get("value", {}) + print(f" Session ID: {status.get('sessionId', 'N/A')}") + print(f" Build: {value.get('build', {}).get('time', 'N/A')}") + + current_app = value.get("currentApp", {}) + if current_app: + print(f"\nCurrent App:") + print(f" Bundle ID: {current_app.get('bundleId', 'N/A')}") + print(f" Process ID: {current_app.get('pid', 'N/A')}") + else: + print("✗ WebDriverAgent is not running") + print("\nPlease start WebDriverAgent on your iOS device:") + print(" 1. Open WebDriverAgent.xcodeproj in Xcode") + print(" 2. Select your device") + print(" 3. Run WebDriverAgentRunner (Product > Test or Cmd+U)") + print(f" 4. For USB: Run port forwarding: iproxy 8100 8100") + + return True + + return False + + def handle_device_commands(args) -> bool: """ Handle device-related commands. @@ -426,6 +625,16 @@ def handle_device_commands(args) -> bool: Returns: True if a device command was handled (should exit), False otherwise. """ + device_type = ( + DeviceType.ADB + if args.device_type == "adb" + else (DeviceType.HDC if args.device_type == "hdc" else DeviceType.IOS) + ) + + # Handle iOS-specific commands + if device_type == DeviceType.IOS: + return handle_ios_device_commands(args) + device_factory = get_device_factory() ConnectionClass = device_factory.get_connection_class() conn = ConnectionClass() @@ -491,17 +700,25 @@ def handle_device_commands(args) -> bool: return False -def main(): - """Main entry point.""" - args = parse_args() +def _run_main(args): + """Internal function to run main logic with args object.""" # Set device type globally based on args - device_type = DeviceType.ADB if args.device_type == "adb" else DeviceType.HDC - set_device_type(device_type) + if args.device_type == "adb": + device_type = DeviceType.ADB + elif args.device_type == "hdc": + device_type = DeviceType.HDC + else: # ios + device_type = DeviceType.IOS + + # Set device type globally for non-iOS devices + if device_type != DeviceType.IOS: + set_device_type(device_type) # Enable HDC verbose mode if using HDC if device_type == DeviceType.HDC: from phone_agent.hdc import set_hdc_verbose + set_hdc_verbose(True) # Handle --list-apps (no system check needed) @@ -509,12 +726,23 @@ def main(): if device_type == DeviceType.HDC: print("Supported HarmonyOS apps:") apps = list_harmonyos_apps() + elif device_type == DeviceType.IOS: + print("Supported iOS apps:") + print("\nNote: For iOS apps, Bundle IDs are configured in:") + print(" phone_agent/config/apps_ios.py") + print("\nCurrently configured apps:") + apps = list_ios_apps() else: print("Supported Android apps:") apps = list_supported_apps() - for app in apps: + for app in sorted(apps): print(f" - {app}") + + if device_type == DeviceType.IOS: + print( + "\nTo add iOS apps, find the Bundle ID and add to APP_PACKAGES_IOS dictionary." + ) return # Handle device commands (these may need partial system checks) @@ -522,14 +750,19 @@ def main(): return # Run system requirements check before proceeding - if not check_system_requirements(device_type): + if not check_system_requirements( + device_type, + wda_url=args.wda_url + if device_type == DeviceType.IOS + else "http://localhost:8100", + ): sys.exit(1) # Check model API connectivity and model availability if not check_model_api(args.base_url, args.model, args.apikey): sys.exit(1) - # Create configurations + # Create configurations and agent based on device type model_config = ModelConfig( base_url=args.base_url, model_name=args.model, @@ -537,22 +770,42 @@ def main(): lang=args.lang, ) - agent_config = AgentConfig( - max_steps=args.max_steps, - device_id=args.device_id, - verbose=not args.quiet, - lang=args.lang, - ) + if device_type == DeviceType.IOS: + # Create iOS agent + agent_config = IOSAgentConfig( + max_steps=args.max_steps, + wda_url=args.wda_url, + device_id=args.device_id, + verbose=not args.quiet, + lang=args.lang, + allow_all_apps=args.allow_all_apps, + ) - # Create agent - agent = PhoneAgent( - model_config=model_config, - agent_config=agent_config, - ) + agent = IOSPhoneAgent( + model_config=model_config, + agent_config=agent_config, + ) + else: + # Create Android/HarmonyOS agent + agent_config = AgentConfig( + max_steps=args.max_steps, + device_id=args.device_id, + verbose=not args.quiet, + lang=args.lang, + allow_all_apps=args.allow_all_apps, + ) + + agent = PhoneAgent( + model_config=model_config, + agent_config=agent_config, + ) # Print header print("=" * 50) - print("Phone Agent - AI-powered phone automation") + if device_type == DeviceType.IOS: + print("Phone Agent iOS - AI-powered iOS automation") + else: + print("Phone Agent - AI-powered phone automation") print("=" * 50) print(f"Model: {model_config.model_name}") print(f"Base URL: {model_config.base_url}") @@ -560,13 +813,27 @@ def main(): print(f"Language: {agent_config.lang}") print(f"Device Type: {args.device_type.upper()}") + # Show iOS-specific config + if device_type == DeviceType.IOS: + print(f"WDA URL: {args.wda_url}") + # Show device info - device_factory = get_device_factory() - devices = device_factory.list_devices() - if agent_config.device_id: - print(f"Device: {agent_config.device_id}") - elif devices: - print(f"Device: {devices[0].device_id} (auto-detected)") + if device_type == DeviceType.IOS: + devices = list_ios_devices() + if agent_config.device_id: + print(f"Device: {agent_config.device_id}") + elif devices: + device = devices[0] + print(f"Device: {device.device_name or device.device_id[:16]}") + if device.model and device.ios_version: + print(f" {device.model}, iOS {device.ios_version}") + else: + device_factory = get_device_factory() + devices = device_factory.list_devices() + if agent_config.device_id: + print(f"Device: {agent_config.device_id}") + elif devices: + print(f"Device: {devices[0].device_id} (auto-detected)") print("=" * 50) @@ -575,6 +842,30 @@ def main(): print(f"\nTask: {args.task}\n") result = agent.run(args.task) print(f"\nResult: {result}") + + # Save result to file if output is specified + if args.output: + try: + import json + # Create parent directory if it doesn't exist + output_dir = os.path.dirname(args.output) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + # Prepare data to save + if getattr(args, "all", False): + # Save all steps + save_data = [res.message or res.thinking for res in agent.history] + else: + # Save only final result + save_data = [result] + + # Save to the specified output file + with open(args.output, "w", encoding="utf-8") as f: + json.dump(save_data, f, ensure_ascii=False, indent=4) + print(f"\nresult保存到{args.output}文件") + except Exception as e: + print(f"\n保存结果到文件失败: {e}") else: # Interactive mode print("\nEntering interactive mode. Type 'quit' to exit.\n") @@ -593,6 +884,31 @@ def main(): print() result = agent.run(task) print(f"\nResult: {result}\n") + + # Save result to file if output is specified + if args.output: + try: + import json + # Create parent directory if it doesn't exist + output_dir = os.path.dirname(args.output) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + # Prepare data to save + if getattr(args, "all", False): + # Save all steps + save_data = [res.message or res.thinking for res in agent.history] + else: + # Save only final result + save_data = [result] + + # Save to the specified output file + with open(args.output, "w", encoding="utf-8") as f: + json.dump(save_data, f, ensure_ascii=False, indent=4) + print(f"result保存到{args.output}文件\n") + except Exception as e: + print(f"保存结果到文件失败: {e}\n") + agent.reset() except KeyboardInterrupt: @@ -602,5 +918,89 @@ def main(): print(f"\nError: {e}\n") +def main(): + """Main entry point.""" + args = parse_args() + _run_main(args) + + +def main_params( + base_url: str = None, + model: str = None, + apikey: str = None, + max_steps: int = None, + device_id: str = None, + connect: str = None, + disconnect: str = None, + list_devices: bool = False, + enable_tcpip: int = None, + wda_url: str = None, + pair: bool = False, + wda_status: bool = False, + quiet: bool = False, + list_apps: bool = False, + lang: str = None, + device_type: str = None, + output: str = None, + task: str = None, + allow_all_apps: bool = False, + save_all: bool = False, +): + """ + Main entry point with parameters. + + Args: + base_url: Model API base URL + model: Model name + apikey: API key for model authentication + max_steps: Maximum steps per task + device_id: Device ID + connect: Connect to remote device (e.g., "192.168.1.100:5555") + disconnect: Disconnect from remote device (or "all" to disconnect all) + list_devices: List connected devices and exit + enable_tcpip: Enable TCP/IP debugging on USB device (port number) + wda_url: WebDriverAgent URL for iOS + pair: Pair with iOS device + wda_status: Show WebDriverAgent status and exit (iOS only) + quiet: Suppress verbose output + list_apps: List supported apps and exit + lang: Language for system prompt (cn or en) + device_type: Device type (adb, hdc, or ios) + output: Output file path to save result (e.g., results/result.json) + task: Task to execute + allow_all_apps: Allow launching any app, not limited to the configured app list + save_all: Save all intermediate step results to the output file, not just the final result + """ + # Create a namespace object similar to argparse.Namespace + class Args: + pass + + args = Args() + + # Set default values from environment or defaults + args.base_url = base_url if base_url is not None else os.getenv("PHONE_AGENT_BASE_URL", "http://localhost:8000/v1") + args.model = model if model is not None else os.getenv("PHONE_AGENT_MODEL", "autoglm-phone-9b") + args.apikey = apikey if apikey is not None else os.getenv("PHONE_AGENT_API_KEY", "EMPTY") + args.max_steps = max_steps if max_steps is not None else int(os.getenv("PHONE_AGENT_MAX_STEPS", "100")) + args.device_id = device_id if device_id is not None else os.getenv("PHONE_AGENT_DEVICE_ID") + args.connect = connect + args.disconnect = disconnect + args.list_devices = list_devices + args.enable_tcpip = enable_tcpip + args.wda_url = wda_url if wda_url is not None else os.getenv("PHONE_AGENT_WDA_URL", "http://localhost:8100") + args.pair = pair + args.wda_status = wda_status + args.quiet = quiet + args.list_apps = list_apps + args.lang = lang if lang is not None else os.getenv("PHONE_AGENT_LANG", "cn") + args.device_type = device_type if device_type is not None else os.getenv("PHONE_AGENT_DEVICE_TYPE", "adb") + args.output = output + args.task = task + args.allow_all_apps = allow_all_apps + args.all = save_all + + _run_main(args) + + if __name__ == "__main__": main() diff --git a/phone_agent/actions/handler.py b/phone_agent/actions/handler.py index 0bef1c3a..f8a0afb7 100644 --- a/phone_agent/actions/handler.py +++ b/phone_agent/actions/handler.py @@ -37,10 +37,12 @@ def __init__( device_id: str | None = None, confirmation_callback: Callable[[str], bool] | None = None, takeover_callback: Callable[[str], None] | None = None, + allow_all_apps: bool = False, ): self.device_id = device_id self.confirmation_callback = confirmation_callback or self._default_confirmation self.takeover_callback = takeover_callback or self._default_takeover + self.allow_all_apps = allow_all_apps def execute( self, action: dict[str, Any], screen_width: int, screen_height: int @@ -122,7 +124,7 @@ def _handle_launch(self, action: dict, width: int, height: int) -> ActionResult: return ActionResult(False, False, "No app name specified") device_factory = get_device_factory() - success = device_factory.launch_app(app_name, self.device_id) + success = device_factory.launch_app(app_name, self.device_id, allow_all_apps=self.allow_all_apps) if success: return ActionResult(True, False) return ActionResult(False, False, f"App not found: {app_name}") diff --git a/phone_agent/actions/handler_ios.py b/phone_agent/actions/handler_ios.py index c37f50d9..9354af36 100644 --- a/phone_agent/actions/handler_ios.py +++ b/phone_agent/actions/handler_ios.py @@ -44,11 +44,13 @@ def __init__( session_id: str | None = None, confirmation_callback: Callable[[str], bool] | None = None, takeover_callback: Callable[[str], None] | None = None, + allow_all_apps: bool = False, ): self.wda_url = wda_url self.session_id = session_id self.confirmation_callback = confirmation_callback or self._default_confirmation self.takeover_callback = takeover_callback or self._default_takeover + self.allow_all_apps = allow_all_apps def execute( self, action: dict[str, Any], screen_width: int, screen_height: int @@ -130,7 +132,7 @@ def _handle_launch(self, action: dict, width: int, height: int) -> ActionResult: return ActionResult(False, False, "No app name specified") success = launch_app( - app_name, wda_url=self.wda_url, session_id=self.session_id + app_name, wda_url=self.wda_url, session_id=self.session_id, allow_all_apps=self.allow_all_apps ) if success: return ActionResult(True, False) diff --git a/phone_agent/adb/device.py b/phone_agent/adb/device.py index 995336a1..59624893 100644 --- a/phone_agent/adb/device.py +++ b/phone_agent/adb/device.py @@ -206,15 +206,16 @@ def home(device_id: str | None = None, delay: float | None = None) -> None: def launch_app( - app_name: str, device_id: str | None = None, delay: float | None = None + app_name: str, device_id: str | None = None, delay: float | None = None, allow_all_apps: bool = False ) -> bool: """ Launch an app by name. Args: - app_name: The app name (must be in APP_PACKAGES). + app_name: The app name (must be in APP_PACKAGES if allow_all_apps is False). device_id: Optional ADB device ID. delay: Delay in seconds after launching. If None, uses configured default. + allow_all_apps: If True, allow launching any app by package name, not limited to APP_PACKAGES. Returns: True if app was launched, False if app not found. @@ -222,11 +223,16 @@ def launch_app( if delay is None: delay = TIMING_CONFIG.device.default_launch_delay - if app_name not in APP_PACKAGES: - return False - adb_prefix = _get_adb_prefix(device_id) - package = APP_PACKAGES[app_name] + + # If allow_all_apps is True, use app_name directly as package name + # Otherwise, check if app_name is in APP_PACKAGES + if allow_all_apps: + package = app_name # Use app_name directly as package name + else: + if app_name not in APP_PACKAGES: + return False + package = APP_PACKAGES[app_name] subprocess.run( adb_prefix diff --git a/phone_agent/agent.py b/phone_agent/agent.py index 36427917..1e716550 100644 --- a/phone_agent/agent.py +++ b/phone_agent/agent.py @@ -22,6 +22,7 @@ class AgentConfig: lang: str = "cn" system_prompt: str | None = None verbose: bool = True + allow_all_apps: bool = False def __post_init__(self): if self.system_prompt is None: @@ -80,6 +81,7 @@ def __init__( self._context: list[dict[str, Any]] = [] self._step_count = 0 + self.history: list[StepResult] = [] def run(self, task: str) -> str: """ @@ -93,9 +95,11 @@ def run(self, task: str) -> str: """ self._context = [] self._step_count = 0 + self.history = [] # First step with user prompt result = self._execute_step(task, is_first=True) + self.history.append(result) if result.finished: return result.message or "Task completed" @@ -103,6 +107,7 @@ def run(self, task: str) -> str: # Continue until finished or max steps reached while self._step_count < self.agent_config.max_steps: result = self._execute_step(is_first=False) + self.history.append(result) if result.finished: return result.message or "Task completed" diff --git a/phone_agent/agent_ios.py b/phone_agent/agent_ios.py index a3b20d9f..c87d7170 100644 --- a/phone_agent/agent_ios.py +++ b/phone_agent/agent_ios.py @@ -24,6 +24,7 @@ class IOSAgentConfig: lang: str = "cn" system_prompt: str | None = None verbose: bool = True + allow_all_apps: bool = False def __post_init__(self): if self.system_prompt is None: @@ -94,10 +95,12 @@ def __init__( session_id=self.agent_config.session_id, confirmation_callback=confirmation_callback, takeover_callback=takeover_callback, + allow_all_apps=self.agent_config.allow_all_apps, ) self._context: list[dict[str, Any]] = [] self._step_count = 0 + self.history: list[StepResult] = [] def run(self, task: str) -> str: """ @@ -111,9 +114,11 @@ def run(self, task: str) -> str: """ self._context = [] self._step_count = 0 + self.history = [] # First step with user prompt result = self._execute_step(task, is_first=True) + self.history.append(result) if result.finished: return result.message or "Task completed" @@ -121,6 +126,7 @@ def run(self, task: str) -> str: # Continue until finished or max steps reached while self._step_count < self.agent_config.max_steps: result = self._execute_step(is_first=False) + self.history.append(result) if result.finished: return result.message or "Task completed" diff --git a/phone_agent/device_factory.py b/phone_agent/device_factory.py index f7d3c466..1a30c181 100644 --- a/phone_agent/device_factory.py +++ b/phone_agent/device_factory.py @@ -9,6 +9,7 @@ class DeviceType(Enum): ADB = "adb" HDC = "hdc" + IOS = "ios" class DeviceFactory: @@ -34,9 +35,11 @@ def module(self): if self._module is None: if self.device_type == DeviceType.ADB: from phone_agent import adb + self._module = adb elif self.device_type == DeviceType.HDC: from phone_agent import hdc + self._module = hdc else: raise ValueError(f"Unknown device type: {self.device_type}") @@ -50,21 +53,43 @@ def get_current_app(self, device_id: str | None = None) -> str: """Get current app name.""" return self.module.get_current_app(device_id) - def tap(self, x: int, y: int, device_id: str | None = None, delay: float | None = None): + def tap( + self, x: int, y: int, device_id: str | None = None, delay: float | None = None + ): """Tap at coordinates.""" return self.module.tap(x, y, device_id, delay) - def double_tap(self, x: int, y: int, device_id: str | None = None, delay: float | None = None): + def double_tap( + self, x: int, y: int, device_id: str | None = None, delay: float | None = None + ): """Double tap at coordinates.""" return self.module.double_tap(x, y, device_id, delay) - def long_press(self, x: int, y: int, duration_ms: int = 3000, device_id: str | None = None, delay: float | None = None): + def long_press( + self, + x: int, + y: int, + duration_ms: int = 3000, + device_id: str | None = None, + delay: float | None = None, + ): """Long press at coordinates.""" return self.module.long_press(x, y, duration_ms, device_id, delay) - def swipe(self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int | None = None, device_id: str | None = None, delay: float | None = None): + def swipe( + self, + start_x: int, + start_y: int, + end_x: int, + end_y: int, + duration_ms: int | None = None, + device_id: str | None = None, + delay: float | None = None, + ): """Swipe from start to end.""" - return self.module.swipe(start_x, start_y, end_x, end_y, duration_ms, device_id, delay) + return self.module.swipe( + start_x, start_y, end_x, end_y, duration_ms, device_id, delay + ) def back(self, device_id: str | None = None, delay: float | None = None): """Press back button.""" @@ -74,9 +99,11 @@ def home(self, device_id: str | None = None, delay: float | None = None): """Press home button.""" return self.module.home(device_id, delay) - def launch_app(self, app_name: str, device_id: str | None = None, delay: float | None = None) -> bool: + def launch_app( + self, app_name: str, device_id: str | None = None, delay: float | None = None, allow_all_apps: bool = False + ) -> bool: """Launch an app.""" - return self.module.launch_app(app_name, device_id, delay) + return self.module.launch_app(app_name, device_id, delay, allow_all_apps) def type_text(self, text: str, device_id: str | None = None): """Type text.""" @@ -102,9 +129,11 @@ def get_connection_class(self): """Get the connection class (ADBConnection or HDCConnection).""" if self.device_type == DeviceType.ADB: from phone_agent.adb import ADBConnection + return ADBConnection elif self.device_type == DeviceType.HDC: from phone_agent.hdc import HDCConnection + return HDCConnection else: raise ValueError(f"Unknown device type: {self.device_type}") diff --git a/phone_agent/hdc/device.py b/phone_agent/hdc/device.py index 63f23c3a..7e268164 100644 --- a/phone_agent/hdc/device.py +++ b/phone_agent/hdc/device.py @@ -218,15 +218,16 @@ def home(device_id: str | None = None, delay: float | None = None) -> None: def launch_app( - app_name: str, device_id: str | None = None, delay: float | None = None + app_name: str, device_id: str | None = None, delay: float | None = None, allow_all_apps: bool = False ) -> bool: """ Launch an app by name. Args: - app_name: The app name (must be in APP_PACKAGES). + app_name: The app name (must be in APP_PACKAGES if allow_all_apps is False). device_id: Optional HDC device ID. delay: Delay in seconds after launching. If None, uses configured default. + allow_all_apps: If True, allow launching any app by bundle name, not limited to APP_PACKAGES. Returns: True if app was launched, False if app not found. @@ -234,17 +235,23 @@ def launch_app( if delay is None: delay = TIMING_CONFIG.device.default_launch_delay - if app_name not in APP_PACKAGES: - print(f"[HDC] App '{app_name}' not found in HarmonyOS app list") - print(f"[HDC] Available apps: {', '.join(sorted(APP_PACKAGES.keys())[:10])}...") - return False - hdc_prefix = _get_hdc_prefix(device_id) - bundle = APP_PACKAGES[app_name] - - # Get the ability name for this bundle - # Default to "EntryAbility" if not specified in APP_ABILITIES - ability = APP_ABILITIES.get(bundle, "EntryAbility") + + # If allow_all_apps is True, use app_name directly as bundle name + # Otherwise, check if app_name is in APP_PACKAGES + if allow_all_apps: + bundle = app_name # Use app_name directly as bundle name + # Default to "EntryAbility" if not specified in APP_ABILITIES + ability = APP_ABILITIES.get(bundle, "EntryAbility") + else: + if app_name not in APP_PACKAGES: + print(f"[HDC] App '{app_name}' not found in HarmonyOS app list") + print(f"[HDC] Available apps: {', '.join(sorted(APP_PACKAGES.keys())[:10])}...") + return False + bundle = APP_PACKAGES[app_name] + # Get the ability name for this bundle + # Default to "EntryAbility" if not specified in APP_ABILITIES + ability = APP_ABILITIES.get(bundle, "EntryAbility") # HarmonyOS uses 'aa start' command to launch apps # Format: aa start -b {bundle} -a {ability} diff --git a/phone_agent/xctest/device.py b/phone_agent/xctest/device.py index 49fc379c..c867fad5 100644 --- a/phone_agent/xctest/device.py +++ b/phone_agent/xctest/device.py @@ -355,26 +355,33 @@ def launch_app( wda_url: str = "http://localhost:8100", session_id: str | None = None, delay: float = 1.0, + allow_all_apps: bool = False, ) -> bool: """ Launch an app by name. Args: - app_name: The app name (must be in APP_PACKAGES). + app_name: The app name (must be in APP_PACKAGES if allow_all_apps is False). wda_url: WebDriverAgent URL. session_id: Optional WDA session ID. delay: Delay in seconds after launching. + allow_all_apps: If True, allow launching any app by bundle ID, not limited to APP_PACKAGES. Returns: True if app was launched, False if app not found. """ - if app_name not in APP_PACKAGES: - return False - try: import requests - bundle_id = APP_PACKAGES[app_name] + # If allow_all_apps is True, use app_name directly as bundle ID + # Otherwise, check if app_name is in APP_PACKAGES + if allow_all_apps: + bundle_id = app_name # Use app_name directly as bundle ID + else: + if app_name not in APP_PACKAGES: + return False + bundle_id = APP_PACKAGES[app_name] + url = _get_wda_session_url(wda_url, session_id, "wda/apps/launch") response = requests.post( diff --git a/scripts/get_package_name.py b/scripts/get_package_name.py new file mode 100644 index 00000000..e305325a --- /dev/null +++ b/scripts/get_package_name.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +""" +工具脚本:查询Android应用的包名 +""" + +import subprocess +import sys +import re + + +def list_all_packages(device_id: str | None = None, third_party_only: bool = False) -> list[str]: + """ + 列出所有已安装应用的包名。 + + Args: + device_id: 可选的设备ID + third_party_only: 是否只显示第三方应用 + + Returns: + 包名列表 + """ + adb_prefix = ["adb"] + if device_id: + adb_prefix = ["adb", "-s", device_id] + + cmd = adb_prefix + ["shell", "pm", "list", "packages"] + if third_party_only: + cmd.append("-3") + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"错误: {result.stderr}") + return [] + + packages = [] + for line in result.stdout.strip().split("\n"): + if line.startswith("package:"): + package = line.replace("package:", "").strip() + packages.append(package) + + return packages + + +def get_current_package(device_id: str | None = None) -> str | None: + """ + 获取当前前台应用的包名。 + + Args: + device_id: 可选的设备ID + + Returns: + 包名,如果无法获取则返回None + """ + adb_prefix = ["adb"] + if device_id: + adb_prefix = ["adb", "-s", device_id] + + result = subprocess.run( + adb_prefix + ["shell", "dumpsys", "window"], + capture_output=True, + text=True + ) + + if result.returncode != 0: + return None + + output = result.stdout + # 查找 mCurrentFocus 或 mFocusedApp + for line in output.split("\n"): + if "mCurrentFocus" in line or "mFocusedApp" in line: + # 提取包名,格式通常是 com.package.name/ActivityName + match = re.search(r'([a-z][a-z0-9_]*\.)+[a-z][a-z0-9_]*', line) + if match: + return match.group(0) + + return None + + +def search_packages(keyword: str, device_id: str | None = None) -> list[str]: + """ + 搜索包含关键词的包名。 + + Args: + keyword: 搜索关键词 + device_id: 可选的设备ID + + Returns: + 匹配的包名列表 + """ + all_packages = list_all_packages(device_id, third_party_only=False) + keyword_lower = keyword.lower() + return [pkg for pkg in all_packages if keyword_lower in pkg.lower()] + + +def get_app_info(package_name: str, device_id: str | None = None) -> dict: + """ + 获取应用的详细信息。 + + Args: + package_name: 包名 + device_id: 可选的设备ID + + Returns: + 应用信息字典 + """ + adb_prefix = ["adb"] + if device_id: + adb_prefix = ["adb", "-s", device_id] + + # 获取应用信息 + result = subprocess.run( + adb_prefix + ["shell", "dumpsys", "package", package_name], + capture_output=True, + text=True + ) + + info = { + "package": package_name, + "installed": False, + "version": None, + "label": None, + } + + if result.returncode == 0: + output = result.stdout + info["installed"] = True + + # 提取版本信息 + version_match = re.search(r'versionName=([^\s]+)', output) + if version_match: + info["version"] = version_match.group(1) + + # 提取应用标签(需要从另一个命令获取) + label_result = subprocess.run( + adb_prefix + ["shell", "pm", "dump", package_name], + capture_output=True, + text=True + ) + if label_result.returncode == 0: + label_match = re.search(r'label=([^\s]+)', label_result.stdout) + if label_match: + info["label"] = label_match.group(1) + + return info + + +def main(): + """主函数""" + if len(sys.argv) < 2: + print("用法:") + print(" python get_package_name.py list # 列出所有第三方应用") + print(" python get_package_name.py list-all # 列出所有应用(包括系统应用)") + print(" python get_package_name.py current # 显示当前前台应用的包名") + print(" python get_package_name.py search <关键词> # 搜索包含关键词的包名") + print(" python get_package_name.py info <包名> # 显示应用的详细信息") + print(" python get_package_name.py device <设备ID> <命令> # 指定设备ID") + sys.exit(1) + + command = sys.argv[1] + device_id = None + + # 检查是否有设备ID参数 + if command == "device" and len(sys.argv) >= 4: + device_id = sys.argv[2] + command = sys.argv[3] + args = sys.argv[4:] + else: + args = sys.argv[2:] + + if command == "list": + print("第三方应用包名列表:") + print("-" * 60) + packages = list_all_packages(device_id, third_party_only=True) + for pkg in sorted(packages): + print(f" {pkg}") + print(f"\n共 {len(packages)} 个应用") + + elif command == "list-all": + print("所有应用包名列表:") + print("-" * 60) + packages = list_all_packages(device_id, third_party_only=False) + for pkg in sorted(packages): + print(f" {pkg}") + print(f"\n共 {len(packages)} 个应用") + + elif command == "current": + package = get_current_package(device_id) + if package: + print(f"当前前台应用包名: {package}") + info = get_app_info(package, device_id) + if info.get("label"): + print(f"应用名称: {info['label']}") + else: + print("无法获取当前应用的包名") + + elif command == "search": + if not args: + print("错误: 请提供搜索关键词") + sys.exit(1) + keyword = args[0] + print(f"搜索包含 '{keyword}' 的包名:") + print("-" * 60) + packages = search_packages(keyword, device_id) + if packages: + for pkg in sorted(packages): + print(f" {pkg}") + print(f"\n找到 {len(packages)} 个匹配的应用") + else: + print("未找到匹配的应用") + + elif command == "info": + if not args: + print("错误: 请提供包名") + sys.exit(1) + package = args[0] + print(f"应用信息: {package}") + print("-" * 60) + info = get_app_info(package, device_id) + if info["installed"]: + print(f"包名: {info['package']}") + if info.get("label"): + print(f"应用名称: {info['label']}") + if info.get("version"): + print(f"版本: {info['version']}") + else: + print(f"错误: 未找到包名为 '{package}' 的应用") + + else: + print(f"错误: 未知命令 '{command}'") + sys.exit(1) + + +if __name__ == "__main__": + main() + From 28d6fa4c563e1f274cd43c289f476092e70c556d Mon Sep 17 00:00:00 2001 From: feifeiyechuan <1010155688@qq.com> Date: Sat, 20 Dec 2025 17:43:08 +0800 Subject: [PATCH 2/2] feat: add structured result saving and main_params API --- README_UPDATE.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README_UPDATE.md b/README_UPDATE.md index bbe63bc1..2291a68a 100644 --- a/README_UPDATE.md +++ b/README_UPDATE.md @@ -2,9 +2,6 @@ 本文档记录了 Open-AutoGLM 项目的最新功能更新和使用说明。 -## 📅 更新日期 -2024年最新更新 - --- ## 🆕 新增功能