Skip to content

Commit 0cac303

Browse files
author
himanshu
committed
added max_pages to fetch if using pagination
1 parent 4c61407 commit 0cac303

1 file changed

Lines changed: 24 additions & 9 deletions

File tree

newsdataapi/newsdataapi_client.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def __init__(
2121
max_retries: Optional[int] = constants.DEFAULT_MAX_RETRIES,
2222
retry_delay: Optional[int] = constants.DEFAULT_RETRY_DELAY,
2323
request_timeout: Optional[int] = constants.DEFAULT_REQUEST_TIMEOUT,
24-
max_result: Optional[int] = 10**10,
24+
max_result: Optional[int] = 10**10,
25+
max_pages: Optional[int] = 10**10,
2526
debug: Optional[bool] = False,
2627
folder_path: Optional[str] = None,
2728
include_headers: Optional[bool] = False
@@ -30,6 +31,7 @@ def __init__(
3031
self.apikey = apikey
3132
self.request_method:requests = requests if session == False else requests.Session()
3233
self.max_result = max_result
34+
self.max_pages = max_pages
3335
self.max_retries = max_retries
3436
self.retry_delay = retry_delay
3537
self.proxies = proxies
@@ -121,7 +123,7 @@ def validate_url(url:str)-> str:
121123

122124
def __get_feeds(self, endpoint: str, query_params: dict) -> Dict[str, Any]:
123125
retry_count = 0
124-
while retry_count < self.max_retries:
126+
while retry_count <= self.max_retries:
125127
retry_count += 1
126128
try:
127129
s_time = time.perf_counter()
@@ -198,7 +200,10 @@ def __get_feeds_all(self, endpoint:str,query_params:dict, max_result: Optional[i
198200

199201
time.sleep(constants.PAGINATION_SLEEP)
200202

201-
def __paginate_results(self,endpoint:str,query_params:dict):
203+
def __paginate_results(self,endpoint:str,query_params:dict,max_pages: Optional[int] = None):
204+
if max_pages is None:
205+
max_pages = self.max_pages
206+
202207
self.request_method = requests.Session()
203208
current_result_count = 0
204209
page = 0
@@ -210,8 +215,14 @@ def __paginate_results(self,endpoint:str,query_params:dict):
210215
logger.info(f"Total result: {response['totalResults']}, Current result count: {current_result_count}, Page: {page}")
211216
yield response
212217

218+
if page >= max_pages:
219+
logger.info(f"Reached maximum page limit: {max_pages}, ending pagination.")
220+
return
221+
213222
if response['nextPage'] is None:
223+
logger.info("No more pages to fetch, ending pagination.")
214224
return
225+
215226
query_params['page'] = response['nextPage']
216227
time.sleep(constants.PAGINATION_SLEEP)
217228

@@ -314,6 +325,7 @@ def latest_api(
314325
max_result: Optional[int] = None,
315326
scroll: Optional[bool] = False,
316327
paginate: Optional[bool] = False,
328+
max_pages: Optional[int] = None,
317329
) -> Dict[str, Any]:
318330
"""
319331
Sending GET request to the latest api.
@@ -357,7 +369,7 @@ def latest_api(
357369
if scroll:
358370
return self.__get_feeds_all(endpoint=self.latest_url,query_params=URL_parameters,max_result=max_result)
359371
elif paginate:
360-
return self.__paginate_results(endpoint=self.latest_url,query_params=URL_parameters)
372+
return self.__paginate_results(endpoint=self.latest_url,query_params=URL_parameters,max_pages=max_pages)
361373
else:
362374
return self.__get_feeds(endpoint=self.latest_url,query_params=URL_parameters)
363375

@@ -388,10 +400,11 @@ def archive_api(
388400
url: Optional[str] = None,
389401
sort: Optional[str] = None,
390402

391-
paginate: Optional[bool] = False,
392403
raw_query: Optional[str] = None,
393404
scroll: Optional[bool] = False,
394405
max_result: Optional[int] = None,
406+
paginate: Optional[bool] = False,
407+
max_pages: Optional[int] = None,
395408
) -> Dict[str, Any]:
396409
"""
397410
Sending GET request to the archive api
@@ -431,7 +444,7 @@ def archive_api(
431444
if scroll:
432445
return self.__get_feeds_all(endpoint=self.archive_url,query_params=URL_parameters,max_result=max_result)
433446
elif paginate:
434-
return self.__paginate_results(endpoint=self.archive_url,query_params=URL_parameters)
447+
return self.__paginate_results(endpoint=self.archive_url,query_params=URL_parameters,max_pages=max_pages)
435448
else:
436449
return self.__get_feeds(endpoint=self.archive_url,query_params=URL_parameters)
437450

@@ -490,9 +503,10 @@ def crypto_api(
490503
url: Optional[str] = None,
491504
sort: Optional[str] = None,
492505

493-
paginate: Optional[bool] = False,
494506
scroll: Optional[bool] = False,
495507
max_result: Optional[int] = None,
508+
paginate: Optional[bool] = False,
509+
max_pages: Optional[int] = None,
496510
) -> Dict[str, Any]:
497511
"""
498512
Sending GET request to the crypto api
@@ -535,7 +549,7 @@ def crypto_api(
535549
if scroll:
536550
return self.__get_feeds_all(endpoint=self.crypto_url,query_params=URL_parameters,max_result=max_result)
537551
elif paginate:
538-
return self.__paginate_results(endpoint=self.crypto_url,query_params=URL_parameters)
552+
return self.__paginate_results(endpoint=self.crypto_url,query_params=URL_parameters,max_pages=max_pages)
539553
else:
540554
return self.__get_feeds(endpoint=self.crypto_url,query_params=URL_parameters)
541555

@@ -634,6 +648,7 @@ def market_api(
634648
max_result: Optional[int] = None,
635649
scroll: Optional[bool] = False,
636650
paginate: Optional[bool] = False,
651+
max_pages: Optional[int] = None,
637652
) -> Dict[str, Any]:
638653
"""
639654
Sending GET request to the market api
@@ -677,7 +692,7 @@ def market_api(
677692
if scroll:
678693
return self.__get_feeds_all(endpoint=self.market_url,query_params=URL_parameters,max_result=max_result)
679694
elif paginate:
680-
return self.__paginate_results(endpoint=self.market_url,query_params=URL_parameters)
695+
return self.__paginate_results(endpoint=self.market_url,query_params=URL_parameters,max_pages=max_pages)
681696
else:
682697
return self.__get_feeds(endpoint=self.market_url,query_params=URL_parameters)
683698

0 commit comments

Comments
 (0)