Skip to content

Commit ab86bbb

Browse files
author
himanshu
committed
bug fix and code optimization
1 parent a3dfadc commit ab86bbb

2 files changed

Lines changed: 52 additions & 50 deletions

File tree

newsdataapi/newsdataapi_client.py

Lines changed: 51 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Optional,Union
44
from datetime import datetime,timezone
55
from urllib.parse import urlencode, quote
6+
from requests.exceptions import RequestException
67
from newsdataapi.newsdataapi_exception import NewsdataException
78

89
class NewsDataApiClient:
@@ -15,12 +16,10 @@ def __init__(
1516
self.apikey = apikey
1617
self.request_method:requests = requests if session == False else requests.Session()
1718
self.max_result = max_result
18-
self.max_result_scroll = max_result
1919
self.max_retries = max_retries
2020
self.retry_delay = retry_delay
2121
self.proxies = proxies
2222
self.request_timeout = request_timeout
23-
self.recursive_retry = max_retries
2423
self.is_debug = debug
2524

2625
def set_retries( self, max_retries:int, retry_delay:int)->None:
@@ -32,6 +31,9 @@ def set_request_timeout( self, request_timeout:int)->None:
3231
""" API maximum timeout for the request """
3332
self.request_timeout = request_timeout
3433

34+
def get_current_dt(self)->str:
35+
return datetime.now(tz=timezone.utc).replace(microsecond=0).strftime("%Y-%m-%d %H:%M:%S")
36+
3537
def api_proxies( self, proxies:dict)->None:
3638
""" Configure Proxie dictionary """
3739
self.proxies = proxies
@@ -59,75 +61,91 @@ def __validate_parms(self,param:str,value:Union[list,int,str,bool])->dict:
5961

6062
return {param:value}
6163

62-
def __get_feeds(self,url:str)-> dict:
64+
def __get_feeds(self,url:str,retry_count:int=None)-> dict:
6365
try:
64-
if self.recursive_retry <= 0:
66+
if retry_count is None:
67+
retry_count = self.max_retries
68+
69+
if retry_count <= 0:
6570
raise NewsdataException('Maximum retry limit reached. For more information use debug parameter while initializing NewsDataApiClient.')
71+
6672
response = self.request_method.get(url=url,proxies=self.proxies,timeout=self.request_timeout)
73+
6774
if self.is_debug == True:
6875
headers = response.headers
69-
print(f'Debug | {datetime.now(tz=timezone.utc).replace(microsecond=0)} | x_rate_limit_remaining: {headers.get("x_rate_limit_remaining")} | x_api_limit_remaining: {headers.get("x_api_limit_remaining")}')
76+
print(f'Debug | {self.get_current_dt()} | x_rate_limit_remaining: {headers.get("x_rate_limit_remaining")} | x_api_limit_remaining: {headers.get("x_api_limit_remaining")}')
77+
7078
feeds_data:dict = response.json()
79+
7180
if response.status_code != 200:
81+
7282
if response.status_code == 500:
7383
if self.is_debug == True:
74-
print(f"Debug | {datetime.now(tz=timezone.utc).replace(microsecond=0)} | Encountered 'ServerError' going to sleep for: {self.retry_delay} seconds.")
84+
print(f"Debug | {self.get_current_dt()} | Encountered 'ServerError' going to sleep for: {self.retry_delay} seconds.")
7585
time.sleep(self.retry_delay)
76-
self.recursive_retry-=1
77-
return self.__get_feeds(url=url)
86+
return self.__get_feeds(url=url,retry_count=retry_count-1)
87+
7888
elif feeds_data.get('results',{}).get('code') == 'TooManyRequests':
7989
if self.is_debug == True:
80-
print(f"Debug | {datetime.now(tz=timezone.utc).replace(microsecond=0)} | Encountered 'TooManyRequests' going to sleep for: {constants.DEFAULT_RETRY_DELAY_TooManyRequests} seconds.")
90+
print(f"Debug | {self.get_current_dt()} | Encountered 'TooManyRequests' going to sleep for: {constants.DEFAULT_RETRY_DELAY_TooManyRequests} seconds.")
8191
time.sleep(constants.DEFAULT_RETRY_DELAY_TooManyRequests)
82-
self.recursive_retry-=1
83-
return self.__get_feeds(url=url)
92+
return self.__get_feeds(url=url,retry_count=retry_count-1)
93+
8494
elif feeds_data.get('results',{}).get('code') == 'RateLimitExceeded':
8595
if self.is_debug == True:
86-
print(f"Debug | {datetime.now(tz=timezone.utc).replace(microsecond=0)} | Encountered 'RateLimitExceeded' going to sleep for: {constants.DEFAULT_RETRY_DELAY_RateLimitExceeded} seconds.")
96+
print(f"Debug | {self.get_current_dt()} | Encountered 'RateLimitExceeded' going to sleep for: {constants.DEFAULT_RETRY_DELAY_RateLimitExceeded} seconds.")
8797
time.sleep(constants.DEFAULT_RETRY_DELAY_RateLimitExceeded)
88-
self.recursive_retry-=1
89-
return self.__get_feeds(url=url)
98+
return self.__get_feeds(url=url,retry_count=retry_count-1)
99+
90100
else:
91101
raise NewsdataException(response.json())
102+
92103
else:
93-
self.recursive_retry = self.max_retries
94104
return feeds_data
95-
except requests.exceptions.ConnectionError:
105+
106+
except RequestException:
107+
96108
if self.is_debug == True:
97-
print(f"Debug | {datetime.now(tz=timezone.utc).replace(microsecond=0)} | Encountered 'ConnectionError' going to sleep for: {self.retry_delay} seconds.")
109+
print(f"Debug | {self.get_current_dt()} | Encountered 'ConnectionError' going to sleep for: {self.retry_delay} seconds.")
98110
time.sleep(self.retry_delay)
111+
99112
if isinstance(self.request_method,requests.Session):
100113
self.request_method = requests.Session()
101-
self.recursive_retry-=1
102-
return self.__get_feeds(url=url)
114+
115+
return self.__get_feeds(url=url,retry_count=retry_count-1)
103116

104-
def __get_feeds_all(self,url:str)-> dict:
105-
if not isinstance(self.max_result_scroll,int):
106-
raise TypeError('max_result should be of type int.')
117+
def __get_feeds_all(self,url:str,max_result:int)-> dict:
107118

119+
if max_result is None:
120+
max_result = self.max_result
121+
122+
if not isinstance(max_result,int):
123+
raise TypeError('max_result should be of type int.')
124+
108125
if not isinstance(self.request_method,requests.Session):
109126
self.request_method = requests.Session()
110127

111128
feeds_count = 0
112129
data = {'totalResults':None,'results':[],'nextPage':True}
113130
while data.get("nextPage"):
114-
response = self.__get_feeds(url=f'{url}&page={data.get("nextPage")}' if data.get('results') else url)
131+
try:
132+
response = self.__get_feeds(url=f'{url}&page={data.get("nextPage")}' if data.get('results') else url)
133+
except NewsdataException as e:
134+
if data['totalResults'] is None:
135+
raise e
136+
return data
115137
data['totalResults'] = response.get('totalResults')
116138
results = response.get('results')
117139
data['results'].extend(results)
118140
data['nextPage'] = response.get('nextPage')
119141
feeds_count+=len(results)
120-
if feeds_count >= self.max_result_scroll:
142+
if self.is_debug == True:
143+
print(f"Debug | {self.get_current_dt()} | total results: {data['totalResults']} | extracted: {feeds_count}")
144+
if feeds_count >= max_result:
121145
return data
122146
time.sleep(0.5)
123147
return data
124148

125-
def _reset_recursive_retry(self):
126-
self.recursive_retry = self.max_retries
127-
128-
def _reset_max_result(self):
129-
self.max_result_scroll = self.max_result
130-
131149
def news_api(
132150
self, q:Optional[str]=None, qInTitle:Optional[str]=None, country:Optional[Union[str, list]]=None, category:Optional[Union[str, list]]=None,
133151
language:Optional[Union[str, list]]=None, domain:Optional[Union[str, list]]=None, timeframe:Optional[Union[int,str]]=None, size:Optional[int]=None,
@@ -145,19 +163,14 @@ def news_api(
145163
'size':size,'domainurl':domainurl,'excludedomain':excludedomain,'timezone':timezone,'full_content':full_content,'image':image,'video':video,'prioritydomain':prioritydomain,
146164
'page':page,'qInMeta':qInMeta,'tag':tag, 'sentiment':sentiment, 'region':region
147165
}
148-
self._reset_recursive_retry()
149166
URL_parameters = {}
150167
for key,value in params.items():
151168
if value is not None:
152169
URL_parameters.update(self.__validate_parms(param=key,value=value))
153170

154171
URL_parameters_encoded = urlencode(URL_parameters, quote_via=quote)
155172
if scroll == True:
156-
if max_result:
157-
self.max_result_scroll = max_result
158-
else:
159-
self._reset_max_result()
160-
return self.__get_feeds_all(url=f'{constants.NEWS_URL}?{URL_parameters_encoded}')
173+
return self.__get_feeds_all(url=f'{constants.NEWS_URL}?{URL_parameters_encoded}',max_result=max_result)
161174
else:
162175
return self.__get_feeds(url=f'{constants.NEWS_URL}?{URL_parameters_encoded}')
163176

@@ -177,19 +190,14 @@ def archive_api(
177190
'timezone':timezone,'full_content':full_content,'image':image,'video':video,'prioritydomain':prioritydomain,'page':page,'from_date':from_date,'to_date':to_date,
178191
'apikey':self.apikey,'qInMeta':qInMeta,'cryptofeeds':cryptofeeds
179192
}
180-
self._reset_recursive_retry()
181193
URL_parameters = {}
182194
for key,value in params.items():
183195
if value is not None:
184196
URL_parameters.update(self.__validate_parms(param=key,value=value))
185197

186198
URL_parameters_encoded = urlencode(URL_parameters, quote_via=quote)
187199
if scroll == True:
188-
if max_result:
189-
self.max_result_scroll = max_result
190-
else:
191-
self._reset_max_result()
192-
return self.__get_feeds_all(url=f'{constants.ARCHIVE_URL}?{URL_parameters_encoded}')
200+
return self.__get_feeds_all(url=f'{constants.ARCHIVE_URL}?{URL_parameters_encoded}',max_result=max_result)
193201
else:
194202
return self.__get_feeds(url=f'{constants.ARCHIVE_URL}?{URL_parameters_encoded}')
195203

@@ -200,7 +208,6 @@ def sources_api( self, country:Optional[str]= None, category:Optional[str]= None
200208
"""
201209
URL_parameters = {}
202210
params = {"apikey":self.apikey, "country":country, "category":category, "language":language, "prioritydomain":prioritydomain}
203-
self._reset_recursive_retry()
204211
URL_parameters = {}
205212
for key,value in params.items():
206213
if value is not None:
@@ -226,19 +233,14 @@ def crypto_api(
226233
'excludedomain':excludedomain,'timezone':timezone,'full_content':full_content,'image':image,'video':video,'prioritydomain':prioritydomain,'page':page,
227234
'timeframe':str(timeframe) if timeframe else timeframe,'qInMeta':qInMeta,'tag':tag, 'sentiment':sentiment,'coin':coin
228235
}
229-
self._reset_recursive_retry()
230236
URL_parameters = {}
231237
for key,value in params.items():
232238
if value is not None:
233239
URL_parameters.update(self.__validate_parms(param=key,value=value))
234240

235241
URL_parameters_encoded = urlencode(URL_parameters, quote_via=quote)
236242
if scroll == True:
237-
if max_result:
238-
self.max_result_scroll = max_result
239-
else:
240-
self._reset_max_result()
241-
return self.__get_feeds_all(url=f'{constants.CRYPTO_URL}?{URL_parameters_encoded}')
243+
return self.__get_feeds_all(url=f'{constants.CRYPTO_URL}?{URL_parameters_encoded}',max_result=max_result)
242244
else:
243245
return self.__get_feeds(url=f'{constants.CRYPTO_URL}?{URL_parameters_encoded}')
244246

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setup(
88
name='newsdataapi',
9-
version='0.1.14',
9+
version='0.1.15',
1010
packages=['newsdataapi'],
1111
description='Python library for newsdata client-API Call',
1212
long_description=long_description,

0 commit comments

Comments
 (0)