File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -21,8 +21,9 @@ def run_queue():
2121
2222 @task
2323 def run_the_queue ():
24- app .send_task ("worker.get_github_data " , args = [0 , 500 ])
24+ app .send_task ("worker.get_data_from_queue " , args = [100 , 500 ])
2525
26+
2627 run_the_queue ()
2728
2829
Original file line number Diff line number Diff line change 77import polars as pl
88
99
10- today = datetime .now ().strftime ("%Y-%m-%d" )
10+ def save_to_parquet (the_data ):
11+ today = datetime .now ().strftime ("%Y-%m-%d" )
1112
12- print ("Waiting for Celery task to complete" )
13-
14- try :
15- print ("Getting the result" )
16- response = build_repo_chord (total = 5000 , batch_size = 500 )
17- the_data = response .get (timeout = 3600 ) # 1 hour timeout
18- print (f"Result: { the_data } " )
19-
20- except Exception as e :
21- print (f"Error: { e } " )
22-
23- else :
2413 if not Path (f"data/{ today } /" ).exists ():
2514 Path (f"data/{ today } " ).mkdir (parents = True , exist_ok = True )
2615
3019 df = pl .DataFrame (the_data )
3120 df .write_parquet (f"data/{ today } /github_data.parquet" , compression = "zstd" )
3221 print ("Valid Parquet data" )
22+
23+
24+ def get_data_from_queue ():
25+ try :
26+ print ("Getting the result" )
27+ response = build_repo_chord (total = 5000 , batch_size = 500 )
28+ the_data = response .get (timeout = 3600 ) # 1 hour timeout
29+ print (f"Result: { the_data } " )
30+
31+ except Exception as e :
32+ print (f"Error: { e } " )
33+
34+ return save_to_parquet (the_data )
35+
36+
37+ if __name__ == "__main__" :
38+ get_data_from_queue ()
Original file line number Diff line number Diff line change 33from pathlib import Path
44import json
55import boto3
6- import time
76from celery import Celery , group , chord
87from celery .utils .log import get_task_logger
98from datetime import datetime
109from github import Auth , Github , GithubException
1110from dotenv import load_dotenv
11+ from client import get_data_from_queue
1212from pydantic_models .github import RabbitMQ_Data_Validation
1313from rb_queue .rabbitmq import get_connection , QUEUE_NAME
1414load_dotenv ()
@@ -183,6 +183,13 @@ def build_repo_chord(total: int = 5000, batch_size: int = 500):
183183 return chord (header )(aggregate_results .s ())
184184
185185
186+ @app .task
187+ def run_queue_and_save (total : int = 5000 , batch_size : int = 500 ):
188+ return get_data_from_queue (total = total , batch_size = batch_size )
189+
190+
191+
192+
186193# old code that did not work
187194# @app.task
188195# def distribute_tasks():
You can’t perform that action at this time.
0 commit comments