Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Lint

on:
pull_request:
branches:
- main
push:
branches:
- main

jobs:
lint:
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1

- name: Setup pnpm
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0

- name: Setup Node.js
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
with:
node-version-file: '.nvmrc'
cache: 'pnpm'

- name: Install Dependencies
run: pnpm install

- name: Lint & Type Check
run: pnpm run lint
66 changes: 38 additions & 28 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,22 @@ npm start # Run built server from lib/server.js
```
src/
├── server.ts # MCP server entry, registers all tools + MCP resources
├── session/
│ ├── state.ts # Session state maps, getBrowser(), getState(), SessionMetadata
│ └── lifecycle.ts # registerSession(), handleSessionTransition(), closeSession()
├── providers/
│ ├── types.ts # SessionProvider interface, ConnectionConfig
│ ├── local-browser.provider.ts # Chrome/Firefox/Edge/Safari capability building
│ └── local-appium.provider.ts # iOS/Android via appium.config.ts
├── tools/
│ ├── browser.tool.ts # Session state + start_browser + getBrowser()
│ ├── browser.tool.ts # start_browser, close_session, readTabs(), switch_tab
│ ├── app-session.tool.ts # start_app_session (iOS/Android via Appium)
│ ├── navigate.tool.ts # URL navigation
│ ├── get-visible-elements.tool.ts # Element detection (web + mobile)
│ ├── click.tool.ts # Click/tap actions
│ ├── navigate.tool.ts # navigateAction() + navigateTool
│ ├── click.tool.ts # clickAction() + clickTool
│ ├── set-value.tool.ts # setValueAction() + setValueTool
│ ├── scroll.tool.ts # scrollAction() + scrollTool
│ ├── gestures.tool.ts # tapAction(), swipeAction(), dragAndDropAction()
│ ├── execute-sequence.tool.ts # Batch action sequencing with stability + state delta
│ └── ... # Other tools follow same pattern
├── recording/
│ ├── step-recorder.ts # withRecording HOF, appendStep, session history access
Expand All @@ -34,27 +44,31 @@ src/
│ ├── generate-all-locators.ts # Multi-strategy selector generation
│ └── source-parsing.ts # XML page source parsing for mobile
├── config/
│ └── appium.config.ts # iOS/Android capability builders
│ └── appium.config.ts # iOS/Android capability builders (used by local-appium.provider)
├── utils/
│ ├── parse-variables.ts # URI template variable parsing (parseBool, parseNumber, etc.)
│ ├── stability-detector.ts # Page stability polling (signature-based, 200ms/500ms/5s)
│ └── state-diff.ts # Element before/after diff (appeared, disappeared, changed)
└── types/
├── tool.ts # ToolDefinition interface
└── recording.ts # RecordedStep, SessionHistory interfaces
```

### Session State

Single active session model in `browser.tool.ts`:
Single active session model in `src/session/state.ts`:

```typescript
const browsers: Map<string, WebdriverIO.Browser> = new Map();
let currentSession: string | null = null;
const sessionMetadata: Map<string, SessionMetadata> = new Map();

export function getBrowser(): WebdriverIO.Browser {
// Returns current active session or throws
}
// Private state — access via getState() or getBrowser()
export function getBrowser(): WebdriverIO.Browser { ... }
export function getState() { return state; }
export interface SessionMetadata { type: 'browser' | 'ios' | 'android'; capabilities: Record<string, unknown>; isAttached: boolean; }
```

State shared with `app-session.tool.ts` via `(getBrowser as any).__state`.
Session lifecycle managed via `src/session/lifecycle.ts`:
- `registerSession()` — registers browser + metadata + history, handles transition sentinel
- `handleSessionTransition()` — appends `__session_transition__` step to outgoing session
- `closeSession()` — terminates or detaches, marks endedAt, cleans up maps

### Tool Pattern

Expand Down Expand Up @@ -103,14 +117,21 @@ MCP resources expose history without tool calls:

| File | Purpose |
|----------------------------------------------------|-----------------------------------------------|
| `src/server.ts` | MCP server init, tool registration |
| `src/tools/browser.tool.ts` | Session state management, `getBrowser()` |
| `src/server.ts` | MCP server init, tool + resource registration |
| `src/session/state.ts` | Session state maps, `getBrowser()`, `getState()` |
| `src/session/lifecycle.ts` | `registerSession()`, `closeSession()`, session transitions |
| `src/tools/browser.tool.ts` | `start_browser`, `close_session`, `switch_tab`, `readTabs()` |
| `src/tools/app-session.tool.ts` | Appium session creation |
| `src/tools/execute-sequence.tool.ts` | Batch action sequencing with stability + delta |
| `src/providers/local-browser.provider.ts` | Chrome/Firefox/Edge/Safari capability building |
| `src/providers/local-appium.provider.ts` | iOS/Android capabilities via appium.config.ts |
| `src/scripts/get-interactable-browser-elements.ts` | Browser-context element detection |
| `src/locators/` | Mobile element detection + locator generation |
| `src/recording/step-recorder.ts` | `withRecording(toolName, cb)` HOF — wraps every tool for step logging |
| `src/recording/code-generator.ts` | Generates runnable WebdriverIO JS from `SessionHistory` |
| `src/recording/resources.ts` | Builds text for `wdio://sessions` and `wdio://session/*/steps` resources |
| `src/utils/stability-detector.ts` | Page stability detection (signature polling) |
| `src/utils/state-diff.ts` | Element state diff (appeared/disappeared/changed) |
| `tsup.config.ts` | Build configuration |

## Gotchas
Expand All @@ -129,16 +150,6 @@ console.log = (...args) => process.stderr.write(util.format(...args) + '\n');
`get-interactable-browser-elements.ts` executes in browser context via `browser.execute()`. Cannot use Node.js APIs or
external imports.

### Mobile State Sharing Hack

`app-session.tool.ts` accesses browser.tool.ts state via:

```typescript
const state = (getBrowser as any).__state;
```

This maintains single-session behavior across browser and mobile.

### Auto-Detach Behavior

Sessions created with `noReset: true` or without `appPath` automatically detach on close (don't terminate on Appium
Expand Down Expand Up @@ -179,6 +190,5 @@ catch (e) {

See `docs/architecture/` for proposals:

- `session-configuration-proposal.md` — Cloud provider pattern (BrowserStack, SauceLabs)
- `interaction-sequencing-proposal.md` — Batch actions with state delta detection
- `session-configuration-proposal.md` — Cloud provider pattern (BrowserStack, SauceLabs) — providers/types.ts is the extension point
- `multi-session-proposal.md` — Parallel sessions for sub-agent coordination
12 changes: 12 additions & 0 deletions docs/architecture/mcp-resources-notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# MCP Resources — Notes

## Template resources are not discoverable via ListResources

The `session-steps` resource uses a URI template (`wdio://session/{sessionId}/steps`) and does
not appear in `ListMcpResourcesTool` output. Only fixed-URI resources (`wdio://sessions`,
`wdio://session/current/steps`) are listed.

Template resources must be read directly by constructing the URI — clients cannot discover them
through the standard list call. If client discoverability matters, consider documenting the
template pattern in the fixed `wdio://sessions` index response, or exposing a separate resource
that advertises available URI templates.
101 changes: 101 additions & 0 deletions src/providers/local-appium.provider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import type { SessionProvider, ConnectionConfig } from './types';
import { buildIOSCapabilities, buildAndroidCapabilities, getAppiumServerConfig } from '../config/appium.config';

export type LocalAppiumOptions = {
platform: 'iOS' | 'Android';
appPath?: string;
deviceName: string;
platformVersion?: string;
automationName?: string;
appiumHost?: string;
appiumPort?: number;
appiumPath?: string;
autoGrantPermissions?: boolean;
autoAcceptAlerts?: boolean;
autoDismissAlerts?: boolean;
appWaitActivity?: string;
udid?: string;
noReset?: boolean;
fullReset?: boolean;
newCommandTimeout?: number;
capabilities?: Record<string, unknown>;
};

export class LocalAppiumProvider implements SessionProvider {
name = 'local-appium';

getConnectionConfig(options: Record<string, unknown>): ConnectionConfig {
const config = getAppiumServerConfig({
hostname: options.appiumHost as string | undefined,
port: options.appiumPort as number | undefined,
path: options.appiumPath as string | undefined,
});
return { protocol: 'http', ...config };
}

buildCapabilities(options: Record<string, unknown>): Record<string, unknown> {
const platform = options.platform as string;
const appPath = options.appPath as string | undefined;
const deviceName = options.deviceName as string;
const platformVersion = options.platformVersion as string | undefined;
const autoGrantPermissions = options.autoGrantPermissions as boolean | undefined;
const autoAcceptAlerts = options.autoAcceptAlerts as boolean | undefined;
const autoDismissAlerts = options.autoDismissAlerts as boolean | undefined;
const udid = options.udid as string | undefined;
const noReset = options.noReset as boolean | undefined;
const fullReset = options.fullReset as boolean | undefined;
const newCommandTimeout = options.newCommandTimeout as number | undefined;
const appWaitActivity = options.appWaitActivity as string | undefined;
const userCapabilities = (options.capabilities as Record<string, unknown> | undefined) ?? {};

const capabilities: Record<string, any> = platform === 'iOS'

Check warning on line 51 in src/providers/local-appium.provider.ts

View workflow job for this annotation

GitHub Actions / lint

Unexpected any. Specify a different type
? buildIOSCapabilities(appPath, {
deviceName,
platformVersion,
automationName: (options.automationName as 'XCUITest') || 'XCUITest',
autoGrantPermissions,
autoAcceptAlerts,
autoDismissAlerts,
udid,
noReset,
fullReset,
newCommandTimeout,
})
: buildAndroidCapabilities(appPath, {
deviceName,
platformVersion,
automationName: (options.automationName as 'UiAutomator2' | 'Espresso') || 'UiAutomator2',
autoGrantPermissions,
autoAcceptAlerts,
autoDismissAlerts,
appWaitActivity,
noReset,
fullReset,
newCommandTimeout,
});

const mergedCapabilities = {
...capabilities,
...userCapabilities,
};

for (const [key, value] of Object.entries(mergedCapabilities)) {
if (value === undefined) {
delete mergedCapabilities[key];
}
}

return mergedCapabilities;
}

getSessionType(options: Record<string, unknown>): 'ios' | 'android' {
const platform = options.platform as string;
return platform.toLowerCase() as 'ios' | 'android';
}

shouldAutoDetach(options: Record<string, unknown>): boolean {
return options.noReset === true || !options.appPath;
}
}

export const localAppiumProvider = new LocalAppiumProvider();
118 changes: 118 additions & 0 deletions src/providers/local-browser.provider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import type { SessionProvider, ConnectionConfig } from './types';

export type LocalBrowserOptions = {
browser?: 'chrome' | 'firefox' | 'edge' | 'safari';
headless?: boolean;
windowWidth?: number;
windowHeight?: number;
capabilities?: Record<string, unknown>;
};

export class LocalBrowserProvider implements SessionProvider {
name = 'local-browser';

getConnectionConfig(_options: Record<string, unknown>): ConnectionConfig {
return {}; // local — use WebdriverIO defaults
}

buildCapabilities(options: Record<string, unknown>): Record<string, unknown> {
const selectedBrowser = (options.browser as string | undefined) ?? 'chrome';
const headless = (options.headless as boolean | undefined) ?? true;
const windowWidth = (options.windowWidth as number | undefined) ?? 1920;
const windowHeight = (options.windowHeight as number | undefined) ?? 1080;
const userCapabilities = (options.capabilities as Record<string, unknown> | undefined) ?? {};

const headlessSupported = selectedBrowser !== 'safari';
const effectiveHeadless = headless && headlessSupported;

const chromiumArgs = [
`--window-size=${windowWidth},${windowHeight}`,
'--no-sandbox',
'--disable-search-engine-choice-screen',
'--disable-infobars',
'--log-level=3',
'--use-fake-device-for-media-stream',
'--use-fake-ui-for-media-stream',
'--disable-web-security',
'--allow-running-insecure-content',
];

if (effectiveHeadless) {
chromiumArgs.push('--headless=new');
chromiumArgs.push('--disable-gpu');
chromiumArgs.push('--disable-dev-shm-usage');
}

const firefoxArgs: string[] = [];
if (effectiveHeadless && selectedBrowser === 'firefox') {
firefoxArgs.push('-headless');
}

const capabilities: Record<string, any> = {

Check warning on line 51 in src/providers/local-browser.provider.ts

View workflow job for this annotation

GitHub Actions / lint

Unexpected any. Specify a different type
acceptInsecureCerts: true,
};

switch (selectedBrowser) {
case 'chrome':
capabilities.browserName = 'chrome';
capabilities['goog:chromeOptions'] = { args: chromiumArgs };
break;
case 'edge':
capabilities.browserName = 'msedge';
capabilities['ms:edgeOptions'] = { args: chromiumArgs };
break;
case 'firefox':
capabilities.browserName = 'firefox';
if (firefoxArgs.length > 0) {
capabilities['moz:firefoxOptions'] = { args: firefoxArgs };
}
break;
case 'safari':
capabilities.browserName = 'safari';
break;
}

const mergedCapabilities: Record<string, unknown> = {
...capabilities,
...userCapabilities,
'goog:chromeOptions': this.mergeCapabilityOptions(capabilities['goog:chromeOptions'], userCapabilities['goog:chromeOptions']),
'ms:edgeOptions': this.mergeCapabilityOptions(capabilities['ms:edgeOptions'], userCapabilities['ms:edgeOptions']),
'moz:firefoxOptions': this.mergeCapabilityOptions(capabilities['moz:firefoxOptions'], userCapabilities['moz:firefoxOptions']),
};

for (const [key, value] of Object.entries(mergedCapabilities)) {
if (value === undefined) {
delete mergedCapabilities[key];
}
}

return mergedCapabilities;
}

getSessionType(_options: Record<string, unknown>): 'browser' {
return 'browser';
}

shouldAutoDetach(_options: Record<string, unknown>): boolean {
return false;
}

private mergeCapabilityOptions(defaultOptions: unknown, customOptions: unknown): unknown {
if (!defaultOptions || typeof defaultOptions !== 'object' || !customOptions || typeof customOptions !== 'object') {
return customOptions ?? defaultOptions;
}

const defaultRecord = defaultOptions as Record<string, unknown>;
const customRecord = customOptions as Record<string, unknown>;
const merged = { ...defaultRecord, ...customRecord };
if (Array.isArray(defaultRecord.args) || Array.isArray(customRecord.args)) {
merged.args = [
...(Array.isArray(defaultRecord.args) ? defaultRecord.args : []),
...(Array.isArray(customRecord.args) ? customRecord.args : []),
];
}
return merged;
}
}

export const localBrowserProvider = new LocalBrowserProvider();
14 changes: 14 additions & 0 deletions src/providers/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
export interface ConnectionConfig {
hostname?: string;
port?: number;
path?: string;
protocol?: string;
}

export interface SessionProvider {
name: string;
getConnectionConfig(options: Record<string, unknown>): ConnectionConfig;
buildCapabilities(options: Record<string, unknown>): Record<string, unknown>;
getSessionType(options: Record<string, unknown>): 'browser' | 'ios' | 'android';
shouldAutoDetach(options: Record<string, unknown>): boolean;
}
Loading
Loading