2020from psycopg2 import Error
2121from sqlalchemy import create_engine
2222from docker .errors import DockerException
23+ from docker .models .containers import Container
24+
25+ from forms .core .forms import from_db
2326
2427def wait_for_postgres (host , port , user , password , dbname , timeout = 15 ):
2528 start_time = time .time ()
@@ -32,15 +35,11 @@ def wait_for_postgres(host, port, user, password, dbname, timeout=15):
3235 time .sleep (1 )
3336 raise Exception ("PostgreSQL did not start within the given timeout" )
3437
35-
36- def setup_postgres ():
38+ def start_postgres_container (postgres_user : str ,
39+ dbname : str ,
40+ password : str ,
41+ port : int ) -> Container :
3742 docker_client = docker .from_env ()
38- host = "localhost"
39- port = "5432"
40- postgres_user = "dt"
41- password = "1234"
42- dbname = "forms_db"
43- test_table = "test_table"
4443
4544 # Start the PostgreSQL container
4645 try :
@@ -56,30 +55,102 @@ def setup_postgres():
5655 )
5756 except DockerException as e :
5857 raise Exception (f"Failed to start PostgreSQL container: { e } " )
58+
59+ return container
60+
61+ def load_table (postgres_user : str ,
62+ password : str , dbname : str ,
63+ host : str , port : str ,
64+ dataset_path : str ,
65+ schema_path : str ,
66+ test_table : str ):
67+
68+ # Wait for the PostgreSQL service to be ready
69+ wait_for_postgres (host = host , port = port , user = postgres_user ,
70+ password = password , dbname = dbname )
71+
72+ # Load a DataFrame into the database
73+ engine = create_engine (f"postgresql://{ postgres_user } :{ password } @{ host } :{ port } /{ dbname } " )
74+ with open (schema_path , 'r' ) as schema_file :
75+ schema_sql = schema_file .read ()
76+ with engine .connect () as connection :
77+ connection .execute (schema_sql )
78+
79+ df = pd .read_csv (dataset_path )
80+ df .to_sql (test_table , engine , if_exists = "append" , index = False )
81+
82+
83+ def run (dataset_path , schema_path , table_name , primary_key ,
84+ formula_file_path , run , pipeline_optimization : bool , output_folder ):
85+
86+ host = "localhost"
87+ port = "5432"
88+ postgres_user = "dt"
89+ password = "1234"
90+ dbname = "forms_db"
91+ order_key = primary_key
92+
93+ container = start_postgres_container (postgres_user , dbname , password , port )
5994
6095 try :
61- # Wait for the PostgreSQL service to be ready
62- wait_for_postgres (host = host , port = port , user = postgres_user , password = password , dbname = dbname )
96+ load_table (postgres_user , password , dbname , host , port , dataset_path , schema_path , table_name )
6397
64- # Set the environment variables for the database connection
65- os .environ ["POSTGRES_USER" ] = postgres_user
66- os .environ ["POSTGRES_PASSWORD" ] = password
67- os .environ ["POSTGRES_DB" ] = dbname
68- os .environ ["POSTGRES_HOST" ] = host
69- os .environ ["POSTGRES_PORT" ] = port
98+ wb = from_db (
99+ host = host ,
100+ port = int (port ),
101+ username = postgres_user ,
102+ password = password ,
103+ db_name = dbname ,
104+ table_name = table_name ,
105+ primary_key = primary_key ,
106+ order_key = order_key ,
107+ enable_rewriting = True ,
108+ enable_pipelining = pipeline_optimization ,
109+ )
70110
71- # Load a DataFrame into the database
72- engine = create_engine ( f"postgresql:// { postgres_user } : { password } @ { host } : { port } / { dbname } " )
73- df = pd . DataFrame ({ "a" : [ 1 , 2 , 3 , 4 ], "b" : [ 2 , 2 , 2 , 2 ], "c" : [ 2 , 3 , 4 , 5 ], "d" : [ 3 , 2 , 2 , 3 ]})
74- df . to_sql ( test_table , engine , if_exists = "replace" , index = False )
111+ # Parse the formula file
112+ formula_id = 'formula_id'
113+ formula_string = 'formula_string'
114+ formulas = pd . read_csv ( formula_file_path , header = None , names = [ formula_id , formula_string ] )
75115
76- os .environ ["POSTGRES_TEST_TABLE" ] = test_table
77- os .environ ["POSTGRES_PRIMARY_KEY" ] = "a"
78- os .environ ["POSTGRES_ORDER_KEY" ] = "a"
116+ for _ , row in formulas .iterrows ():
117+ formula_id = row ['formula_id' ]
118+ formula_string = row ['formula_string' ]
119+ # Execute the formula string
120+ print (f"Running formula { formula_id } : { formula_string } " )
121+ wb .compute_formula (formula_string )
79122
80- yield
123+ # Close the DBWorkbook
124+ wb .close ()
81125
82126 finally :
83127 # Tear down the PostgreSQL container
84128 container .stop ()
85- container .remove ()
129+ container .remove ()
130+
131+
132+ if __name__ == "__main__" :
133+ import argparse
134+
135+ parser = argparse .ArgumentParser (description = "Benchmarking script for FormS" )
136+ parser .add_argument ("--dataset_path" , required = True , help = "Path to the dataset folder" )
137+ parser .add_argument ("--schema_path" , required = True , help = "Path to the sql query that creates the table" )
138+ parser .add_argument ("--table_name" , required = True , help = "Name of the table" )
139+ parser .add_argument ("--primary_key" , required = True , help = "Primary key of the table" )
140+ parser .add_argument ("--formula_file_path" , required = True , help = "Path of the formula file" )
141+ parser .add_argument ("--run" , required = True , help = "Test run identifier" )
142+ parser .add_argument ("--pipeline_optimization" , required = True , help = "False: function-level transalation; True: subtree-level transalation)" )
143+ parser .add_argument ("--output_folder" , required = True , help = "Path to the output folder" )
144+
145+ args = parser .parse_args ()
146+
147+ run (
148+ dataset_path = args .dataset_path ,
149+ schema_path = args .schema_path ,
150+ table_name = args .table_name ,
151+ primary_key = args .primary_key ,
152+ formula_file_path = args .formula_file_path ,
153+ run = args .run ,
154+ pipeline_optimization = args .pipeline_optimization ,
155+ output_folder = args .output_folder
156+ )
0 commit comments