jackwener
diff --git a/‎docs/adapters/browser/google.md‎
Lines changed: 62 additions & 0 deletions b/‎docs/adapters/browser/google.md‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎src/clis/google/news.ts‎
Lines changed: 66 additions & 0 deletions b/‎src/clis/google/news.ts‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎src/clis/google/search.ts‎
Lines changed: 133 additions & 0 deletions b/‎src/clis/google/search.ts‎
Lines changed: 133 additions & 0 deletions
diff --git a/‎src/clis/google/suggest.ts‎
Lines changed: 40 additions & 0 deletions b/‎src/clis/google/suggest.ts‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/clis/google/trends.ts‎
Lines changed: 44 additions & 0 deletions b/‎src/clis/google/trends.ts‎
Lines changed: 44 additions & 0 deletions
@@ -0,0 +1,62 @@
+# Google
+
+**Mode**: 🌐 / 🔐 Mixed · **Domains**: `google.com`, `suggestqueries.google.com`, `news.google.com`, `trends.google.com`
+
+## Commands
+
+| Command | Description |
+|---------|-------------|
+| `opencli google search <keyword>` | Search Google and extract results from the page |
+| `opencli google suggest <keyword>` | Get Google search suggestions |
+| `opencli google news [keyword]` | Get Google News headlines (top stories or search) |
+| `opencli google trends` | Get Google Trends daily trending searches |
+
+## What works today
+
+- Public API commands work without a browser:
+  - `suggest` — JSON API, no auth needed
+  - `news` — RSS feed, supports top stories and keyword search
+  - `trends` — RSS feed, supports different regions
+- `google search` uses browser mode to extract results from google.com.
+
+## Current limitations
+
+- `google search` may trigger CAPTCHA in Standalone browser mode. Extension mode (with an established Chrome session) is more reliable.
+- Google frequently changes its DOM structure. If `search` stops returning results, selectors may need updating.
+- Snippet extraction may return empty for some results depending on Google's layout.
+
+## Usage Examples
+
+```bash
+# Search Google
+opencli google search "typescript tutorial" --limit 10
+
+# Get search suggestions
+opencli google suggest python
+
+# Get top news headlines
+opencli google news --limit 5
+
+# Search news for a topic
+opencli google news "artificial intelligence" --limit 10 --lang en --region US
+
+# Get trending searches in Japan
+opencli google trends --region JP --limit 10
+
+# Output as JSON
+opencli google search "machine learning" -f json
+```
+
+## Prerequisites
+
+- `suggest`, `news`, `trends` do not require Chrome.
+- `search` requires:
+  - Chrome running (or Standalone mode will auto-launch)
+  - For best results, use the [Browser Bridge extension](/guide/browser-bridge) with an established Google session
+
+## Notes
+
+- `suggest` defaults to `--lang zh-CN`; other commands default to `--lang en`.
+- `news` supports `--lang` and `--region` parameters for localized results.
+- `trends` traffic values are raw strings (e.g. "500K+", "1,000,000+"), not numeric.
+- `search` output includes three result types: `result` (standard), `snippet` (featured answer box), and `paa` (People Also Ask).
@@ -0,0 +1,66 @@
+/**
+ * Google News via public RSS feed.
+ * Supports top stories (no keyword) and search (with keyword).
+ */
+
+import { cli, Strategy } from '../../registry.js';
+import { CliError } from '../../errors.js';
+import { parseRssItems } from './utils.js';
+
+cli({
+  site: 'google',
+  name: 'news',
+  description: 'Get Google News headlines',
+  strategy: Strategy.PUBLIC,
+  browser: false,
+  args: [
+    { name: 'keyword', positional: true, help: 'Search query (omit for top stories)' },
+    { name: 'limit', type: 'int', default: 10, help: 'Number of results' },
+    { name: 'lang', default: 'en', help: 'Language short code (e.g. en, zh)' },
+    { name: 'region', default: 'US', help: 'Region code (e.g. US, CN)' },
+  ],
+  columns: ['title', 'source', 'date', 'url'],
+  func: async (_page, args) => {
+    const limit = Math.max(1, Math.min(Number(args.limit), 100));
+    const lang = encodeURIComponent(args.lang);
+    const region = encodeURIComponent(args.region);
+    const ceid = `${args.region}:${args.lang}`;
+
+    // Top stories or search
+    const base = args.keyword
+      ? `https://news.google.com/rss/search?q=${encodeURIComponent(args.keyword)}&hl=${lang}&gl=${region}&ceid=${ceid}`
+      : `https://news.google.com/rss?hl=${lang}&gl=${region}&ceid=${ceid}`;
+
+    const resp = await fetch(base);
+    if (!resp.ok) {
+      throw new CliError('FETCH_ERROR', `HTTP ${resp.status}`, 'Check your network connection');
+    }
+
+    const xml = await resp.text();
+    const items = parseRssItems(xml, ['title', 'link', 'pubDate', 'source']);
+
+    if (!items.length) {
+      throw new CliError('NOT_FOUND', 'No news articles found', 'Try a different keyword or region');
+    }
+
+    return items.slice(0, limit).map(item => {
+      // Extract source: prefer <source> element, fallback to parsing title
+      let title = item['title'] || '';
+      let source = item['source'] || '';
+      if (!source) {
+        const idx = title.lastIndexOf(' - ');
+        if (idx !== -1) {
+          source = title.slice(idx + 3);
+          title = title.slice(0, idx);
+        }
+      }
+
+      return {
+        title,
+        source,
+        date: item['pubDate'] || '',
+        url: item['link'] || '',
+      };
+    });
+  },
+});
@@ -0,0 +1,133 @@
+/**
+ * Google Web Search via browser DOM extraction.
+ * Uses browser mode to navigate google.com and extract results from the DOM.
+ *
+ * Extraction strategy (2026-03): Google no longer uses `.g` class containers.
+ * Instead, we find all `a` tags containing `h3` within `#rso`, then walk up
+ * to the result container (`div.tF2Cxc` or closest `div[data-hveid]`) to find
+ * snippets. This approach is resilient to class name changes.
+ */
+
+import { cli, Strategy } from '../../registry.js';
+import { CliError } from '../../errors.js';
+
+cli({
+  site: 'google',
+  name: 'search',
+  description: 'Search Google',
+  domain: 'google.com',
+  strategy: Strategy.PUBLIC,
+  browser: true,
+  args: [
+    { name: 'keyword', positional: true, required: true, help: 'Search query' },
+    { name: 'limit', type: 'int', default: 10, help: 'Number of results (1-100)' },
+    { name: 'lang', default: 'en', help: 'Language short code (e.g. en, zh)' },
+  ],
+  columns: ['type', 'title', 'url', 'snippet'],
+  func: async (page, args) => {
+    const limit = Math.max(1, Math.min(Number(args.limit), 100));
+    const keyword = encodeURIComponent(args.keyword);
+    const lang = encodeURIComponent(args.lang);
+    const url = `https://www.google.com/search?q=${keyword}&hl=${lang}&num=${limit}`;
+
+    await page.goto(url);
+    await page.wait(2);
+
+    const results = await page.evaluate(`
+      (function() {
+        var results = [];
+        var seenUrls = {};
+        var rso = document.querySelector('#rso');
+        if (!rso) return results;
+
+        // -- Featured snippet (scoped to #rso to avoid matching unrelated elements) --
+        var featuredEl = rso.querySelector('.xpdopen .hgKElc')
+                      || rso.querySelector('.IZ6rdc');
+        if (featuredEl) {
+          var parentBlock = featuredEl.closest('[data-hveid]') || featuredEl.parentElement;
+          var fLink = parentBlock ? parentBlock.querySelector('a[href]') : null;
+          var fUrl = fLink ? fLink.href : '';
+          if (fUrl) seenUrls[fUrl] = true;
+          results.push({
+            type: 'snippet',
+            title: featuredEl.textContent.trim().slice(0, 200),
+            url: fUrl,
+            snippet: '',
+          });
+        }
+
+        // -- Standard search results --
+        // Strategy: find all links containing h3 within #rso
+        var allLinks = rso.querySelectorAll('a');
+        for (var i = 0; i < allLinks.length; i++) {
+          var link = allLinks[i];
+          var h3 = link.querySelector('h3');
+          if (!h3) continue;
+
+          var href = link.href || '';
+          // Skip non-http, Google internal links, and duplicates
+          if (!href.match(/^https?:\\/\\//)) continue;
+          if (href.indexOf('google.com/search') !== -1) continue;
+          if (seenUrls[href]) continue;
+          seenUrls[href] = true;
+
+          // Walk up to find result container for snippet extraction
+          var container = link;
+          for (var j = 0; j < 6; j++) {
+            if (container.parentElement && container.parentElement !== rso) {
+              container = container.parentElement;
+            }
+            // Stop at a known result boundary
+            if (container.getAttribute && container.getAttribute('data-hveid')) break;
+          }
+
+          // Find snippet: look for descriptive text, skip breadcrumbs and metadata
+          var snippetText = '';
+          var titleText = h3.textContent.trim();
+          var candidates = container.querySelectorAll('span, div');
+          for (var k = 0; k < candidates.length; k++) {
+            var el = candidates[k];
+            if (el.querySelector('h3') || el.querySelector('a[href]')) continue;
+            var text = el.textContent.trim();
+            if (text.length < 40 || text.length > 500) continue;
+            if (text === titleText) continue;
+            // Skip URL breadcrumbs (e.g. "https://example.com › path..." or "Site Namehttps://...")
+            if (text.indexOf('\u203A') !== -1) continue;
+            if (new RegExp('https?://').test(text.slice(0, 60))) continue;
+            snippetText = text;
+            break;
+          }
+
+          results.push({
+            type: 'result',
+            title: h3.textContent.trim(),
+            url: href,
+            snippet: snippetText.slice(0, 300),
+          });
+        }
+
+        // -- People Also Ask --
+        var paaContainers = document.querySelectorAll('[data-sgrd="true"]');
+        for (var i = 0; i < paaContainers.length; i++) {
+          var questionEl = paaContainers[i].querySelector('span.CSkcDe');
+          if (questionEl) {
+            results.push({
+              type: 'paa',
+              title: questionEl.textContent.trim(),
+              url: '',
+              snippet: '',
+            });
+          }
+        }
+
+        return results;
+      })()
+    `);
+
+    if (!Array.isArray(results) || results.length === 0) {
+      throw new CliError('NOT_FOUND', 'No search results found', 'Try a different keyword or check for CAPTCHA');
+    }
+
+    return results;
+  },
+});
@@ -0,0 +1,40 @@
+/**
+ * Google Search Suggestions via public JSON API.
+ * Uses suggestqueries.google.com with client=firefox for pure JSON (not JSONP).
+ */
+
+import { cli, Strategy } from '../../registry.js';
+import { CliError } from '../../errors.js';
+
+cli({
+  site: 'google',
+  name: 'suggest',
+  description: 'Get Google search suggestions',
+  strategy: Strategy.PUBLIC,
+  browser: false,
+  args: [
+    { name: 'keyword', positional: true, required: true, help: 'Search query' },
+    { name: 'lang', default: 'zh-CN', help: 'Language code' },
+  ],
+  columns: ['suggestion'],
+  func: async (_page, args) => {
+    const keyword = encodeURIComponent(args.keyword);
+    const lang = encodeURIComponent(args.lang);
+    const url = `https://suggestqueries.google.com/complete/search?client=firefox&q=${keyword}&hl=${lang}`;
+
+    const resp = await fetch(url);
+    if (!resp.ok) {
+      throw new CliError('FETCH_ERROR', `HTTP ${resp.status}`, 'Check your network connection');
+    }
+
+    const data = await resp.json();
+    // Response format: ["query", ["suggestion1", "suggestion2", ...]]
+    const suggestions: string[] = Array.isArray(data) && Array.isArray(data[1]) ? data[1] : [];
+
+    if (!suggestions.length) {
+      throw new CliError('NOT_FOUND', 'No suggestions found', 'Try a different keyword');
+    }
+
+    return suggestions.map(s => ({ suggestion: s }));
+  },
+});
@@ -0,0 +1,44 @@
+/**
+ * Google Trends via public RSS feed.
+ * Shows daily trending searches for a given region.
+ */
+
+import { cli, Strategy } from '../../registry.js';
+import { CliError } from '../../errors.js';
+import { parseRssItems } from './utils.js';
+
+cli({
+  site: 'google',
+  name: 'trends',
+  description: 'Get Google Trends daily trending searches',
+  strategy: Strategy.PUBLIC,
+  browser: false,
+  args: [
+    { name: 'region', default: 'US', help: 'Region code (e.g. US, CN, JP)' },
+    { name: 'limit', type: 'int', default: 20, help: 'Number of results' },
+  ],
+  columns: ['title', 'traffic', 'date'],
+  func: async (_page, args) => {
+    const limit = Math.max(1, Math.min(Number(args.limit), 100));
+    const region = encodeURIComponent(args.region);
+    const url = `https://trends.google.com/trending/rss?geo=${region}`;
+
+    const resp = await fetch(url);
+    if (!resp.ok) {
+      throw new CliError('FETCH_ERROR', `HTTP ${resp.status}`, 'Check your network connection or region code');
+    }
+
+    const xml = await resp.text();
+    const items = parseRssItems(xml, ['title', 'pubDate', 'ht:approx_traffic']);
+
+    if (!items.length) {
+      throw new CliError('NOT_FOUND', 'No trending data found', 'Try a different region code');
+    }
+
+    return items.slice(0, limit).map(item => ({
+      title: item['title'],
+      traffic: item['ht:approx_traffic'],  // raw string e.g. "1,000,000+", no numeric conversion
+      date: item['pubDate'],
+    }));
+  },
+});