diff --git a/.flake8 b/.flake8 index f62069cb6..9e73d77f6 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] max-line-length = 119 -ignore = E9,F63,F7,F82,E402 +ignore = E9,F63,F7,F82,E402,E231,E713 diff --git a/.github/workflows/python-non-master.yml b/.github/workflows/python-non-master.yml index 3623ede1e..1c3d07434 100644 --- a/.github/workflows/python-non-master.yml +++ b/.github/workflows/python-non-master.yml @@ -16,12 +16,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -33,7 +33,7 @@ jobs: flake8 . --count --show-source --statistics - name: Lint with pylint run: | - pylint howdoi *.py --rcfile=.pylintrc + pylint howdoi --rcfile=.pylintrc - name: Test with nose run: | nose2 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 203e90424..583ecccbd 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -12,12 +12,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -29,7 +29,7 @@ jobs: flake8 . --count --show-source --statistics - name: Lint with pylint run: | - pylint howdoi *.py --rcfile=.pylintrc + pylint howdoi --rcfile=.pylintrc - name: Test with nose run: | nose2 diff --git a/.pylintrc b/.pylintrc index 875f8e5f4..79d942b96 100644 --- a/.pylintrc +++ b/.pylintrc @@ -7,7 +7,7 @@ extension-pkg-whitelist= # Add files or directories to the blacklist. They should be base names, not # paths. -ignore=CVS +ignore=CVS,setup.py # Add files or directories matching the regex patterns to the blacklist. The # regex matches against base names, not paths. @@ -470,4 +470,4 @@ min-public-methods=2 # Exceptions that will emit a warning when being caught. Defaults to # "Exception". -overgeneral-exceptions=Exception +overgeneral-exceptions=builtins.Exception diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index 9ab69e359..690ff34e6 100644 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -55,17 +55,24 @@ SCHEME = 'https://' VERIFY_SSL_CERTIFICATE = True -SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo') +SUPPORTED_SEARCH_ENGINES = ('stackexchange', 'google', 'bing', 'duckduckgo') + +STACKEXCHANGE_API_URL = 'https://api.stackexchange.com/2.3/search/advanced' URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com' -USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0', - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0', - 'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0', - ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) ' - 'Chrome/19.0.1084.46 Safari/536.5'), - ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46' - 'Safari/536.5'),) +USER_AGENTS = ( + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ' + 'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) ' + 'Gecko/20100101 Firefox/125.0', +) SEARCH_URLS = { 'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en', 'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en', @@ -75,7 +82,11 @@ BLOCK_INDICATORS = ( 'form id="captcha-form"', 'This page appears when Google automatically detects requests coming from your computer ' - 'network which appear to be in violation of the Terms of Service' + 'network which appear to be in violation of the Terms of Service', + 'consent.google.com', + 'id="consent-bump"', + 'action="https://consent.google', + 'Before you continue to Google Search', ) BLOCKED_QUESTION_FRAGMENTS = ( @@ -179,7 +190,10 @@ def _get_result(url): resp = howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)}, proxies=get_proxies(), verify=VERIFY_SSL_CERTIFICATE, - cookies={'CONSENT': 'YES+US.en+20170717-00-0'}) + cookies={'CONSENT': 'PENDING+987', + 'SOCS': 'CAESHAgBEhJnd3NfMjAyNDA0MTUtMF9SQzIaBnpoLUNOIAEaBgiA_LyxBg', + 'prov': '797823e3-8c1a-431e-a174-0a9e03ceb7f3', + '__cflb': '02DiuFA7zZL3enAQJD3AX8ZzvyzLcaG7uv8yqzetfbBde'}) resp.raise_for_status() return resp.text except requests.exceptions.SSLError as error: @@ -278,8 +292,37 @@ def _is_blocked(page): return False +def _get_links_from_stackexchange(query): + site = URL.replace('.com', '').replace('www.', '') + params = { + 'order': 'desc', + 'sort': 'relevance', + 'q': query, + 'site': site, + 'pagesize': 10, + } + logging.info('Searching StackExchange API for: %s', query) + try: + resp = howdoi_session.get(STACKEXCHANGE_API_URL, params=params, + proxies=get_proxies(), verify=VERIFY_SSL_CERTIFICATE) + resp.raise_for_status() + data = resp.json() + links = [item['link'] for item in data.get('items', []) if 'link' in item] + if links: + logging.info('StackExchange API returned %d results', len(links)) + return links + logging.info('StackExchange API returned no results') + except (requests.RequestException, ValueError) as error: + logging.info('StackExchange API error: %s', error) + raise BlockError('No results from stackexchange') + + def _get_links(query): - search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google') + search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'stackexchange') + + if search_engine == 'stackexchange': + return _get_links_from_stackexchange(query) + search_url = _get_search_url(search_engine).format(URL, url_quote(query)) logging.info('Searching %s with URL: %s', search_engine, search_url) @@ -299,6 +342,7 @@ def _get_links(query): if len(links) == 0: logging.info('Search engine %s found no StackOverflow links, returned HTML is:', search_engine) logging.info(result) + raise BlockError(f'No results from {search_engine}') return list(dict.fromkeys(links)) # remove any duplicates @@ -334,7 +378,7 @@ def _format_output(args, code): return code syntax = Syntax(code, lexer, background_color="default", line_numbers=False) - console = Console(record=True) + console = Console(record=True, force_terminal=True) with console.capture() as capture: console.print(syntax) return capture.get() @@ -436,7 +480,7 @@ def _get_answers(args): initial_pos = args['pos'] - 1 final_pos = initial_pos + args['num_answers'] question_links = question_links[initial_pos:final_pos] - search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google') + search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'stackexchange') logging.info('Links from %s found on %s: %s', URL, search_engine, len(question_links)) logging.info('URL: %s', '\n '.join(question_links)) @@ -606,7 +650,7 @@ def howdoi(raw_query): else: args = raw_query - search_engine = args['search_engine'] or os.getenv('HOWDOI_SEARCH_ENGINE') or 'google' + search_engine = args['search_engine'] or os.getenv('HOWDOI_SEARCH_ENGINE') or 'stackexchange' os.environ['HOWDOI_SEARCH_ENGINE'] = search_engine if search_engine not in SUPPORTED_SEARCH_ENGINES: supported_search_engines = ', '.join(SUPPORTED_SEARCH_ENGINES) diff --git a/requirements/dev.txt b/requirements/dev.txt index 006cb5b64..7abeeed8a 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,7 +1,7 @@ # Contains development specific requirements and imports common requirements -flake8==5.0.4 +flake8==7.1.2 nose2==0.12.0 -pylint==2.15.10 +pylint==3.3.6 pre-commit==2.17.0 twine==3.8.0 -r common.txt diff --git a/test_howdoi.py b/test_howdoi.py index 9c90b686e..54dfa7ae5 100644 --- a/test_howdoi.py +++ b/test_howdoi.py @@ -107,8 +107,10 @@ def test_get_link_at_pos(self): '/questions/42/') @patch.object(howdoi, '_get_result') - def test_blockerror(self, mock_get_links): - mock_get_links.side_effect = requests.HTTPError + @patch.object(howdoi, '_get_links_from_stackexchange') + def test_blockerror(self, mock_se_links, mock_get_result): + mock_se_links.side_effect = howdoi.BlockError('No results from stackexchange') + mock_get_result.side_effect = requests.HTTPError query = self.queries[0] response = howdoi.howdoi(query) self.assertEqual(response, "ERROR: \x1b[91mUnable to get a response from any search engine\n\x1b[0m")