From e043c712f3b585ebf2f29c2509cf621d806a150f Mon Sep 17 00:00:00 2001 From: Benjamin Gleitzman Date: Mon, 13 Apr 2026 16:18:51 -0700 Subject: [PATCH 1/5] Use StackExchange API as default search backend Google, Bing, and DuckDuckGo all now block HTTP scraping with CAPTCHA/consent pages, making howdoi unable to find answers (fixes #516). Switch to StackExchange API as the default search backend (300 free requests/day, no auth needed). The scraping backends remain available as fallbacks via HOWDOI_SEARCH_ENGINE env var. Also: updated user agents to 2024-era browsers, added consent page detection to block indicators, and made empty results trigger engine fallback instead of silently returning nothing. Co-Authored-By: Claude Opus 4.6 --- howdoi/howdoi.py | 59 +++++++++++++++++++++++++++++++++++++++--------- test_howdoi.py | 6 +++-- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index 9ab69e359..a0ce771aa 100644 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -55,17 +55,17 @@ SCHEME = 'https://' VERIFY_SSL_CERTIFICATE = True -SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo') +SUPPORTED_SEARCH_ENGINES = ('stackexchange', 'google', 'bing', 'duckduckgo') + +STACKEXCHANGE_API_URL = 'https://api.stackexchange.com/2.3/search/advanced' URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com' -USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0', - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0', - 'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0', - ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) ' - 'Chrome/19.0.1084.46 Safari/536.5'), - ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46' - 'Safari/536.5'),) +USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0',) SEARCH_URLS = { 'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en', 'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en', @@ -75,7 +75,11 @@ BLOCK_INDICATORS = ( 'form id="captcha-form"', 'This page appears when Google automatically detects requests coming from your computer ' - 'network which appear to be in violation of the Terms of Service' + 'network which appear to be in violation of the Terms of Service', + 'consent.google.com', + 'id="consent-bump"', + 'action="https://consent.google', + 'Before you continue to Google Search', ) BLOCKED_QUESTION_FRAGMENTS = ( @@ -179,7 +183,10 @@ def _get_result(url): resp = howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)}, proxies=get_proxies(), verify=VERIFY_SSL_CERTIFICATE, - cookies={'CONSENT': 'YES+US.en+20170717-00-0'}) + cookies={'CONSENT': 'PENDING+987', + 'SOCS': 'CAESHAgBEhJnd3NfMjAyNDA0MTUtMF9SQzIaBnpoLUNOIAEaBgiA_LyxBg', + 'prov': '797823e3-8c1a-431e-a174-0a9e03ceb7f3', + '__cflb': '02DiuFA7zZL3enAQJD3AX8ZzvyzLcaG7uv8yqzetfbBde'}) resp.raise_for_status() return resp.text except requests.exceptions.SSLError as error: @@ -278,8 +285,37 @@ def _is_blocked(page): return False +def _get_links_from_stackexchange(query): + site = URL.replace('.com', '').replace('www.', '') + params = { + 'order': 'desc', + 'sort': 'relevance', + 'q': query, + 'site': site, + 'pagesize': 10, + } + logging.info('Searching StackExchange API for: %s', query) + try: + resp = howdoi_session.get(STACKEXCHANGE_API_URL, params=params, + proxies=get_proxies(), verify=VERIFY_SSL_CERTIFICATE) + resp.raise_for_status() + data = resp.json() + links = [item['link'] for item in data.get('items', []) if 'link' in item] + if links: + logging.info('StackExchange API returned %d results', len(links)) + return links + logging.info('StackExchange API returned no results') + except (requests.RequestException, ValueError) as error: + logging.info('StackExchange API error: %s', error) + raise BlockError('No results from stackexchange') + + def _get_links(query): - search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google') + search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'stackexchange') + + if search_engine == 'stackexchange': + return _get_links_from_stackexchange(query) + search_url = _get_search_url(search_engine).format(URL, url_quote(query)) logging.info('Searching %s with URL: %s', search_engine, search_url) @@ -299,6 +335,7 @@ def _get_links(query): if len(links) == 0: logging.info('Search engine %s found no StackOverflow links, returned HTML is:', search_engine) logging.info(result) + raise BlockError(f'No results from {search_engine}') return list(dict.fromkeys(links)) # remove any duplicates diff --git a/test_howdoi.py b/test_howdoi.py index 9c90b686e..54dfa7ae5 100644 --- a/test_howdoi.py +++ b/test_howdoi.py @@ -107,8 +107,10 @@ def test_get_link_at_pos(self): '/questions/42/') @patch.object(howdoi, '_get_result') - def test_blockerror(self, mock_get_links): - mock_get_links.side_effect = requests.HTTPError + @patch.object(howdoi, '_get_links_from_stackexchange') + def test_blockerror(self, mock_se_links, mock_get_result): + mock_se_links.side_effect = howdoi.BlockError('No results from stackexchange') + mock_get_result.side_effect = requests.HTTPError query = self.queries[0] response = howdoi.howdoi(query) self.assertEqual(response, "ERROR: \x1b[91mUnable to get a response from any search engine\n\x1b[0m") From 10f38dc519391bf578192991db9b0b8c7311bafe Mon Sep 17 00:00:00 2001 From: Benjamin Gleitzman Date: Tue, 14 Apr 2026 10:36:16 -0700 Subject: [PATCH 2/5] ci: drop EOL Python 3.7/3.8, add 3.12/3.13, update actions to v4/v5 Python 3.7 and 3.8 are no longer available on GitHub Actions runners. Update checkout to v4 and setup-python to v5. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/python-non-master.yml | 6 +++--- .github/workflows/python.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-non-master.yml b/.github/workflows/python-non-master.yml index 3623ede1e..11fcf755e 100644 --- a/.github/workflows/python-non-master.yml +++ b/.github/workflows/python-non-master.yml @@ -16,12 +16,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 203e90424..6ca8cc900 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -12,12 +12,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies From 59ed973261a386e0c9c0352356ab966eaa6b20c5 Mon Sep 17 00:00:00 2001 From: Benjamin Gleitzman Date: Tue, 14 Apr 2026 10:39:49 -0700 Subject: [PATCH 3/5] fix: resolve flake8 lint errors for CI Break long USER_AGENTS lines and suppress E231/E713 false positives from f-string colons and string literals. Co-Authored-By: Claude Opus 4.6 --- .flake8 | 2 +- howdoi/howdoi.py | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.flake8 b/.flake8 index f62069cb6..9e73d77f6 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] max-line-length = 119 -ignore = E9,F63,F7,F82,E402 +ignore = E9,F63,F7,F82,E402,E231,E713 diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index a0ce771aa..d27a8d618 100644 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -61,11 +61,18 @@ URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com' -USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0',) +USER_AGENTS = ( + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ' + 'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) ' + 'Gecko/20100101 Firefox/125.0', +) SEARCH_URLS = { 'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en', 'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en', From 82b94e1cd54632d315951bf841e2b5d6fb63a627 Mon Sep 17 00:00:00 2001 From: Benjamin Gleitzman Date: Tue, 14 Apr 2026 20:56:51 -0700 Subject: [PATCH 4/5] fix: upgrade pylint/flake8 for Python 3.12+ compatibility pylint 2.15.10 crashes on Python 3.12+ with `visit_typealias` error in astroid. Upgrade to pylint 3.3.6 and flake8 7.1.2. Also fix deprecated pylintrc option and exclude setup.py from pylint (distutils removed in 3.12+). Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude Co-Authored-By: Happy --- .github/workflows/python-non-master.yml | 2 +- .github/workflows/python.yml | 2 +- .pylintrc | 4 ++-- requirements/dev.txt | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-non-master.yml b/.github/workflows/python-non-master.yml index 11fcf755e..1c3d07434 100644 --- a/.github/workflows/python-non-master.yml +++ b/.github/workflows/python-non-master.yml @@ -33,7 +33,7 @@ jobs: flake8 . --count --show-source --statistics - name: Lint with pylint run: | - pylint howdoi *.py --rcfile=.pylintrc + pylint howdoi --rcfile=.pylintrc - name: Test with nose run: | nose2 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 6ca8cc900..583ecccbd 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -29,7 +29,7 @@ jobs: flake8 . --count --show-source --statistics - name: Lint with pylint run: | - pylint howdoi *.py --rcfile=.pylintrc + pylint howdoi --rcfile=.pylintrc - name: Test with nose run: | nose2 diff --git a/.pylintrc b/.pylintrc index 875f8e5f4..79d942b96 100644 --- a/.pylintrc +++ b/.pylintrc @@ -7,7 +7,7 @@ extension-pkg-whitelist= # Add files or directories to the blacklist. They should be base names, not # paths. -ignore=CVS +ignore=CVS,setup.py # Add files or directories matching the regex patterns to the blacklist. The # regex matches against base names, not paths. @@ -470,4 +470,4 @@ min-public-methods=2 # Exceptions that will emit a warning when being caught. Defaults to # "Exception". -overgeneral-exceptions=Exception +overgeneral-exceptions=builtins.Exception diff --git a/requirements/dev.txt b/requirements/dev.txt index 006cb5b64..7abeeed8a 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,7 +1,7 @@ # Contains development specific requirements and imports common requirements -flake8==5.0.4 +flake8==7.1.2 nose2==0.12.0 -pylint==2.15.10 +pylint==3.3.6 pre-commit==2.17.0 twine==3.8.0 -r common.txt From 82f4a5ac45f1099b227e2914327624ef4d12cffb Mon Sep 17 00:00:00 2001 From: Benjamin Gleitzman Date: Tue, 14 Apr 2026 21:08:06 -0700 Subject: [PATCH 5/5] fix: use stackexchange as consistent default engine, fix colorize output - Change default search engine from 'google' to 'stackexchange' in howdoi() and _get_answers() to match _get_links() default - Add force_terminal=True to Rich Console so ANSI color codes are always emitted when -c flag is used (fixes test_colorize) Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude Co-Authored-By: Happy --- howdoi/howdoi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index d27a8d618..690ff34e6 100644 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -378,7 +378,7 @@ def _format_output(args, code): return code syntax = Syntax(code, lexer, background_color="default", line_numbers=False) - console = Console(record=True) + console = Console(record=True, force_terminal=True) with console.capture() as capture: console.print(syntax) return capture.get() @@ -480,7 +480,7 @@ def _get_answers(args): initial_pos = args['pos'] - 1 final_pos = initial_pos + args['num_answers'] question_links = question_links[initial_pos:final_pos] - search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google') + search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'stackexchange') logging.info('Links from %s found on %s: %s', URL, search_engine, len(question_links)) logging.info('URL: %s', '\n '.join(question_links)) @@ -650,7 +650,7 @@ def howdoi(raw_query): else: args = raw_query - search_engine = args['search_engine'] or os.getenv('HOWDOI_SEARCH_ENGINE') or 'google' + search_engine = args['search_engine'] or os.getenv('HOWDOI_SEARCH_ENGINE') or 'stackexchange' os.environ['HOWDOI_SEARCH_ENGINE'] = search_engine if search_engine not in SUPPORTED_SEARCH_ENGINES: supported_search_engines = ', '.join(SUPPORTED_SEARCH_ENGINES)