@@ -20,6 +20,7 @@ def __init__(
2020 proxies : Optional [Dict [str , Any ]] = None ,
2121 max_retries : Optional [int ] = constants .DEFAULT_MAX_RETRIES ,
2222 retry_delay : Optional [int ] = constants .DEFAULT_RETRY_DELAY ,
23+ pagination_delay : Optional [int ] = constants .PAGINATION_DELAY ,
2324 request_timeout : Optional [int ] = constants .DEFAULT_REQUEST_TIMEOUT ,
2425 max_result : Optional [int ] = 10 ** 10 ,
2526 max_pages : Optional [int ] = 10 ** 10 ,
@@ -34,6 +35,7 @@ def __init__(
3435 self .max_pages = max_pages
3536 self .max_retries = max_retries
3637 self .retry_delay = retry_delay
38+ self .pagination_delay = pagination_delay
3739 self .proxies = proxies
3840 self .request_timeout = request_timeout
3941 # self.is_debug = debug
@@ -59,13 +61,14 @@ def set_request_timeout( self, request_timeout:int)->None:
5961 """ API maximum timeout for the request """
6062 self .request_timeout = request_timeout
6163
62- def get_current_dt (self )-> str :
63- return datetime .now (tz = timezone .utc ).replace (microsecond = 0 ).strftime ("%Y-%m-%d %H:%M:%S" )
64-
6564 def api_proxies ( self , proxies :dict )-> None :
6665 """ Configure Proxie dictionary """
6766 self .proxies = proxies
6867
68+ def set_pagination_delay ( self , pagination_delay :int )-> None :
69+ """ Set delay between paginated requests """
70+ self .pagination_delay = pagination_delay
71+
6972 def __validate_parms (self , user_param : Dict [str , Any ]) -> Dict [str , Any ]:
7073 bool_params = {'full_content' ,'image' ,'video' ,'removeduplicate' }
7174 int_params = {'size' }
@@ -122,9 +125,7 @@ def validate_url(url:str)-> str:
122125 return valid_parms
123126
124127 def __get_feeds (self , endpoint : str , query_params : dict ) -> Dict [str , Any ]:
125- retry_count = 0
126- while retry_count <= self .max_retries :
127- retry_count += 1
128+ for retry_count in range (1 ,self .max_retries + 1 ):
128129 try :
129130 s_time = time .perf_counter ()
130131 params = query_params .copy ()
@@ -139,15 +140,15 @@ def __get_feeds(self, endpoint: str, query_params: dict) -> Dict[str, Any]:
139140 X_RateLimit_Remaining = response .headers .get ("X-RateLimit-Remaining" )
140141 logger .info (f"X-API-Limit-Remaining: { X_API_Limit_Remaining } , X-RateLimit-Remaining: { X_RateLimit_Remaining } " )
141142
142- try :
143- feeds_data : dict = response . json ()
144- except :
145- raise NewsdataException ( f"Invalid JSON response: { response . text [: 200 ] } " )
146-
147- if self . include_headers :
148- feeds_data [ 'response_headers' ] = dict ( response . headers )
149-
150- if response . status_code == 200 and feeds_data . get ( 'status' ) == 'success' :
143+ feeds_data : dict = response . json ()
144+
145+ if (
146+ response . status_code == 200
147+ and feeds_data . get ( 'status' ) == 'success'
148+ and feeds_data . get ( 'results' ) is not None
149+ ):
150+ if self . include_headers :
151+ feeds_data [ 'response_headers' ] = dict ( response . headers )
151152 return feeds_data
152153
153154 elif response .status_code == 500 :
@@ -162,12 +163,18 @@ def __get_feeds(self, endpoint: str, query_params: dict) -> Dict[str, Any]:
162163 continue
163164
164165 else :
165- msg = f"Unexpected response { response .status_code } : { response .text [:200 ]} "
166- raise NewsdataException (msg )
166+ raise NewsdataException (feeds_data )
167+
168+ except NewsdataException as e :
169+ raise e
167170
168171 except RequestException as e :
169172 logger .error (f"ConnectionError on attempt { retry_count } /{ self .max_retries } : { e } . Sleeping { self .retry_delay } s." )
170173 time .sleep (self .retry_delay )
174+
175+ except Exception as e :
176+ logger .error (f"Unexpected error on attempt { retry_count } /{ self .max_retries } : { e } . Sleeping { self .retry_delay } s." )
177+ time .sleep (self .retry_delay )
171178
172179 raise NewsdataException (f"Maximum retry limit reached: { self .max_retries } ." )
173180
@@ -198,7 +205,7 @@ def __get_feeds_all(self, endpoint:str,query_params:dict, max_result: Optional[i
198205 if max_result and len (data ['results' ]) >= max_result :
199206 return data
200207
201- time .sleep (constants . PAGINATION_SLEEP )
208+ time .sleep (self . pagination_delay )
202209
203210 def __paginate_results (self ,endpoint :str ,query_params :dict ,max_pages : Optional [int ] = None ):
204211 if max_pages is None :
@@ -209,22 +216,21 @@ def __paginate_results(self,endpoint:str,query_params:dict,max_pages: Optional[i
209216 page = 0
210217 while True :
211218 response = self .__get_feeds (endpoint = endpoint ,query_params = query_params )
212- if response ['status' ] == 'success' :
213- current_result_count += len (response ['results' ])
214- page += 1
215- logger .info (f"Total result: { response ['totalResults' ]} , Current result count: { current_result_count } , Page: { page } " )
216- yield response
217-
218- if page >= max_pages :
219- logger .info (f"Reached maximum page limit: { max_pages } , ending pagination." )
220- return
221-
222- if response ['nextPage' ] is None :
223- logger .info ("No more pages to fetch, ending pagination." )
224- return
225-
226- query_params ['page' ] = response ['nextPage' ]
227- time .sleep (constants .PAGINATION_SLEEP )
219+ current_result_count += len (response ['results' ])
220+ page += 1
221+ logger .info (f"Total result: { response ['totalResults' ]} , Current result count: { current_result_count } , Page: { page } " )
222+ yield response
223+
224+ if page >= max_pages :
225+ logger .info (f"Reached maximum page limit: { max_pages } , ending pagination." )
226+ return
227+
228+ if response ['nextPage' ] is None :
229+ logger .info ("No more pages to fetch, ending pagination." )
230+ return
231+
232+ query_params ['page' ] = response ['nextPage' ]
233+ time .sleep (self .pagination_delay )
228234
229235 def news_api (
230236 self ,
0 commit comments