33from typing import Optional ,Union
44from datetime import datetime ,timezone
55from urllib .parse import urlencode , quote
6+ from requests .exceptions import RequestException
67from newsdataapi .newsdataapi_exception import NewsdataException
78
89class NewsDataApiClient :
@@ -15,12 +16,10 @@ def __init__(
1516 self .apikey = apikey
1617 self .request_method :requests = requests if session == False else requests .Session ()
1718 self .max_result = max_result
18- self .max_result_scroll = max_result
1919 self .max_retries = max_retries
2020 self .retry_delay = retry_delay
2121 self .proxies = proxies
2222 self .request_timeout = request_timeout
23- self .recursive_retry = max_retries
2423 self .is_debug = debug
2524
2625 def set_retries ( self , max_retries :int , retry_delay :int )-> None :
@@ -32,6 +31,9 @@ def set_request_timeout( self, request_timeout:int)->None:
3231 """ API maximum timeout for the request """
3332 self .request_timeout = request_timeout
3433
34+ def get_current_dt (self )-> str :
35+ return datetime .now (tz = timezone .utc ).replace (microsecond = 0 ).strftime ("%Y-%m-%d %H:%M:%S" )
36+
3537 def api_proxies ( self , proxies :dict )-> None :
3638 """ Configure Proxie dictionary """
3739 self .proxies = proxies
@@ -59,75 +61,91 @@ def __validate_parms(self,param:str,value:Union[list,int,str,bool])->dict:
5961
6062 return {param :value }
6163
62- def __get_feeds (self ,url :str )-> dict :
64+ def __get_feeds (self ,url :str , retry_count : int = None )-> dict :
6365 try :
64- if self .recursive_retry <= 0 :
66+ if retry_count is None :
67+ retry_count = self .max_retries
68+
69+ if retry_count <= 0 :
6570 raise NewsdataException ('Maximum retry limit reached. For more information use debug parameter while initializing NewsDataApiClient.' )
71+
6672 response = self .request_method .get (url = url ,proxies = self .proxies ,timeout = self .request_timeout )
73+
6774 if self .is_debug == True :
6875 headers = response .headers
69- print (f'Debug | { datetime .now (tz = timezone .utc ).replace (microsecond = 0 )} | x_rate_limit_remaining: { headers .get ("x_rate_limit_remaining" )} | x_api_limit_remaining: { headers .get ("x_api_limit_remaining" )} ' )
76+ print (f'Debug | { self .get_current_dt ()} | x_rate_limit_remaining: { headers .get ("x_rate_limit_remaining" )} | x_api_limit_remaining: { headers .get ("x_api_limit_remaining" )} ' )
77+
7078 feeds_data :dict = response .json ()
79+
7180 if response .status_code != 200 :
81+
7282 if response .status_code == 500 :
7383 if self .is_debug == True :
74- print (f"Debug | { datetime . now ( tz = timezone . utc ). replace ( microsecond = 0 )} | Encountered 'ServerError' going to sleep for: { self .retry_delay } seconds." )
84+ print (f"Debug | { self . get_current_dt ( )} | Encountered 'ServerError' going to sleep for: { self .retry_delay } seconds." )
7585 time .sleep (self .retry_delay )
76- self .recursive_retry -= 1
77- return self . __get_feeds ( url = url )
86+ return self .__get_feeds ( url = url , retry_count = retry_count - 1 )
87+
7888 elif feeds_data .get ('results' ,{}).get ('code' ) == 'TooManyRequests' :
7989 if self .is_debug == True :
80- print (f"Debug | { datetime . now ( tz = timezone . utc ). replace ( microsecond = 0 )} | Encountered 'TooManyRequests' going to sleep for: { constants .DEFAULT_RETRY_DELAY_TooManyRequests } seconds." )
90+ print (f"Debug | { self . get_current_dt ( )} | Encountered 'TooManyRequests' going to sleep for: { constants .DEFAULT_RETRY_DELAY_TooManyRequests } seconds." )
8191 time .sleep (constants .DEFAULT_RETRY_DELAY_TooManyRequests )
82- self .recursive_retry -= 1
83- return self . __get_feeds ( url = url )
92+ return self .__get_feeds ( url = url , retry_count = retry_count - 1 )
93+
8494 elif feeds_data .get ('results' ,{}).get ('code' ) == 'RateLimitExceeded' :
8595 if self .is_debug == True :
86- print (f"Debug | { datetime . now ( tz = timezone . utc ). replace ( microsecond = 0 )} | Encountered 'RateLimitExceeded' going to sleep for: { constants .DEFAULT_RETRY_DELAY_RateLimitExceeded } seconds." )
96+ print (f"Debug | { self . get_current_dt ( )} | Encountered 'RateLimitExceeded' going to sleep for: { constants .DEFAULT_RETRY_DELAY_RateLimitExceeded } seconds." )
8797 time .sleep (constants .DEFAULT_RETRY_DELAY_RateLimitExceeded )
88- self .recursive_retry -= 1
89- return self . __get_feeds ( url = url )
98+ return self .__get_feeds ( url = url , retry_count = retry_count - 1 )
99+
90100 else :
91101 raise NewsdataException (response .json ())
102+
92103 else :
93- self .recursive_retry = self .max_retries
94104 return feeds_data
95- except requests .exceptions .ConnectionError :
105+
106+ except RequestException :
107+
96108 if self .is_debug == True :
97- print (f"Debug | { datetime . now ( tz = timezone . utc ). replace ( microsecond = 0 )} | Encountered 'ConnectionError' going to sleep for: { self .retry_delay } seconds." )
109+ print (f"Debug | { self . get_current_dt ( )} | Encountered 'ConnectionError' going to sleep for: { self .retry_delay } seconds." )
98110 time .sleep (self .retry_delay )
111+
99112 if isinstance (self .request_method ,requests .Session ):
100113 self .request_method = requests .Session ()
101- self . recursive_retry -= 1
102- return self .__get_feeds (url = url )
114+
115+ return self .__get_feeds (url = url , retry_count = retry_count - 1 )
103116
104- def __get_feeds_all (self ,url :str )-> dict :
105- if not isinstance (self .max_result_scroll ,int ):
106- raise TypeError ('max_result should be of type int.' )
117+ def __get_feeds_all (self ,url :str ,max_result :int )-> dict :
107118
119+ if max_result is None :
120+ max_result = self .max_result
121+
122+ if not isinstance (max_result ,int ):
123+ raise TypeError ('max_result should be of type int.' )
124+
108125 if not isinstance (self .request_method ,requests .Session ):
109126 self .request_method = requests .Session ()
110127
111128 feeds_count = 0
112129 data = {'totalResults' :None ,'results' :[],'nextPage' :True }
113130 while data .get ("nextPage" ):
114- response = self .__get_feeds (url = f'{ url } &page={ data .get ("nextPage" )} ' if data .get ('results' ) else url )
131+ try :
132+ response = self .__get_feeds (url = f'{ url } &page={ data .get ("nextPage" )} ' if data .get ('results' ) else url )
133+ except NewsdataException as e :
134+ if data ['totalResults' ] is None :
135+ raise e
136+ return data
115137 data ['totalResults' ] = response .get ('totalResults' )
116138 results = response .get ('results' )
117139 data ['results' ].extend (results )
118140 data ['nextPage' ] = response .get ('nextPage' )
119141 feeds_count += len (results )
120- if feeds_count >= self .max_result_scroll :
142+ if self .is_debug == True :
143+ print (f"Debug | { self .get_current_dt ()} | total results: { data ['totalResults' ]} | extracted: { feeds_count } " )
144+ if feeds_count >= max_result :
121145 return data
122146 time .sleep (0.5 )
123147 return data
124148
125- def _reset_recursive_retry (self ):
126- self .recursive_retry = self .max_retries
127-
128- def _reset_max_result (self ):
129- self .max_result_scroll = self .max_result
130-
131149 def news_api (
132150 self , q :Optional [str ]= None , qInTitle :Optional [str ]= None , country :Optional [Union [str , list ]]= None , category :Optional [Union [str , list ]]= None ,
133151 language :Optional [Union [str , list ]]= None , domain :Optional [Union [str , list ]]= None , timeframe :Optional [Union [int ,str ]]= None , size :Optional [int ]= None ,
@@ -145,19 +163,14 @@ def news_api(
145163 'size' :size ,'domainurl' :domainurl ,'excludedomain' :excludedomain ,'timezone' :timezone ,'full_content' :full_content ,'image' :image ,'video' :video ,'prioritydomain' :prioritydomain ,
146164 'page' :page ,'qInMeta' :qInMeta ,'tag' :tag , 'sentiment' :sentiment , 'region' :region
147165 }
148- self ._reset_recursive_retry ()
149166 URL_parameters = {}
150167 for key ,value in params .items ():
151168 if value is not None :
152169 URL_parameters .update (self .__validate_parms (param = key ,value = value ))
153170
154171 URL_parameters_encoded = urlencode (URL_parameters , quote_via = quote )
155172 if scroll == True :
156- if max_result :
157- self .max_result_scroll = max_result
158- else :
159- self ._reset_max_result ()
160- return self .__get_feeds_all (url = f'{ constants .NEWS_URL } ?{ URL_parameters_encoded } ' )
173+ return self .__get_feeds_all (url = f'{ constants .NEWS_URL } ?{ URL_parameters_encoded } ' ,max_result = max_result )
161174 else :
162175 return self .__get_feeds (url = f'{ constants .NEWS_URL } ?{ URL_parameters_encoded } ' )
163176
@@ -177,19 +190,14 @@ def archive_api(
177190 'timezone' :timezone ,'full_content' :full_content ,'image' :image ,'video' :video ,'prioritydomain' :prioritydomain ,'page' :page ,'from_date' :from_date ,'to_date' :to_date ,
178191 'apikey' :self .apikey ,'qInMeta' :qInMeta ,'cryptofeeds' :cryptofeeds
179192 }
180- self ._reset_recursive_retry ()
181193 URL_parameters = {}
182194 for key ,value in params .items ():
183195 if value is not None :
184196 URL_parameters .update (self .__validate_parms (param = key ,value = value ))
185197
186198 URL_parameters_encoded = urlencode (URL_parameters , quote_via = quote )
187199 if scroll == True :
188- if max_result :
189- self .max_result_scroll = max_result
190- else :
191- self ._reset_max_result ()
192- return self .__get_feeds_all (url = f'{ constants .ARCHIVE_URL } ?{ URL_parameters_encoded } ' )
200+ return self .__get_feeds_all (url = f'{ constants .ARCHIVE_URL } ?{ URL_parameters_encoded } ' ,max_result = max_result )
193201 else :
194202 return self .__get_feeds (url = f'{ constants .ARCHIVE_URL } ?{ URL_parameters_encoded } ' )
195203
@@ -200,7 +208,6 @@ def sources_api( self, country:Optional[str]= None, category:Optional[str]= None
200208 """
201209 URL_parameters = {}
202210 params = {"apikey" :self .apikey , "country" :country , "category" :category , "language" :language , "prioritydomain" :prioritydomain }
203- self ._reset_recursive_retry ()
204211 URL_parameters = {}
205212 for key ,value in params .items ():
206213 if value is not None :
@@ -226,19 +233,14 @@ def crypto_api(
226233 'excludedomain' :excludedomain ,'timezone' :timezone ,'full_content' :full_content ,'image' :image ,'video' :video ,'prioritydomain' :prioritydomain ,'page' :page ,
227234 'timeframe' :str (timeframe ) if timeframe else timeframe ,'qInMeta' :qInMeta ,'tag' :tag , 'sentiment' :sentiment ,'coin' :coin
228235 }
229- self ._reset_recursive_retry ()
230236 URL_parameters = {}
231237 for key ,value in params .items ():
232238 if value is not None :
233239 URL_parameters .update (self .__validate_parms (param = key ,value = value ))
234240
235241 URL_parameters_encoded = urlencode (URL_parameters , quote_via = quote )
236242 if scroll == True :
237- if max_result :
238- self .max_result_scroll = max_result
239- else :
240- self ._reset_max_result ()
241- return self .__get_feeds_all (url = f'{ constants .CRYPTO_URL } ?{ URL_parameters_encoded } ' )
243+ return self .__get_feeds_all (url = f'{ constants .CRYPTO_URL } ?{ URL_parameters_encoded } ' ,max_result = max_result )
242244 else :
243245 return self .__get_feeds (url = f'{ constants .CRYPTO_URL } ?{ URL_parameters_encoded } ' )
244246
0 commit comments