|
16 | 16 | import math |
17 | 17 | from typing import Optional |
18 | 18 |
|
19 | | -from urllib.request import Request, urlopen |
20 | 19 | from urllib.parse import urlencode |
21 | 20 |
|
22 | 21 |
|
@@ -847,20 +846,19 @@ class ComposerPopularity(featuresModule.FeatureExtractor): |
847 | 846 |
|
848 | 847 | Requires an internet connection. |
849 | 848 |
|
| 849 | + Changed in v7 -- implementation uses the package `requests_html`, which must |
| 850 | + be installed. |
850 | 851 |
|
851 | 852 | >>> #_DOCS_SHOW s = corpus.parse('mozart/k155', 2) |
852 | 853 | >>> s = stream.Score() #_DOCS_HIDE |
853 | 854 | >>> s.append(metadata.Metadata()) #_DOCS_HIDE |
854 | 855 | >>> s.metadata.composer = 'W.A. Mozart' #_DOCS_HIDE |
855 | 856 | >>> fe = features.native.ComposerPopularity(s) |
856 | | - >>> #_DOCS_SHOW fe.extract().vector[0] > 5.0 |
857 | | - >>> True #_DOCS_HIDE |
| 857 | + >>> fe.extract().vector[0] > 5.0 |
858 | 858 | True |
859 | 859 | ''' |
860 | 860 | id = 'MD1' |
861 | 861 | googleResultsRE = re.compile(r'([\d,]+) results') |
862 | | - _M21UserAgent = ('Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) ' |
863 | | - + 'Gecko/20071127 Firefox/2.0.0.11') |
864 | 862 |
|
865 | 863 | def __init__(self, dataOrStream=None, *arguments, **keywords): |
866 | 864 | super().__init__(dataOrStream=dataOrStream, *arguments, **keywords) |
@@ -888,13 +886,11 @@ def process(self): |
888 | 886 | params = urlencode(paramsBasic) |
889 | 887 | urlStr = f'http://www.google.com/search?{params}' |
890 | 888 |
|
891 | | - headers = {'User-Agent': self._M21UserAgent} |
892 | | - req = Request(urlStr, headers=headers) |
893 | | - with urlopen(req) as response: |
894 | | - the_page = response.read() |
895 | | - the_page = the_page.decode('utf-8') |
896 | | - |
897 | | - m = self.googleResultsRE.search(the_page) |
| 889 | + from requests_html import HTMLSession |
| 890 | + session = HTMLSession() |
| 891 | + response = session.get(urlStr) |
| 892 | + resultsDiv = response.html.find('div[@id="result-stats"]', first=True) |
| 893 | + m = self.googleResultsRE.search(resultsDiv.text) |
898 | 894 | if m is not None and m.group(0): |
899 | 895 | totalRes = int(m.group(1).replace(',', '')) |
900 | 896 | if totalRes > 0: |
|
0 commit comments