Skip to content

Commit 648dafc

Browse files
committed
working on benchmark
1 parent 553c192 commit 648dafc

1 file changed

Lines changed: 96 additions & 25 deletions

File tree

systests/benchmark.py

Lines changed: 96 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
from psycopg2 import Error
2121
from sqlalchemy import create_engine
2222
from docker.errors import DockerException
23+
from docker.models.containers import Container
24+
25+
from forms.core.forms import from_db
2326

2427
def wait_for_postgres(host, port, user, password, dbname, timeout=15):
2528
start_time = time.time()
@@ -32,15 +35,11 @@ def wait_for_postgres(host, port, user, password, dbname, timeout=15):
3235
time.sleep(1)
3336
raise Exception("PostgreSQL did not start within the given timeout")
3437

35-
36-
def setup_postgres():
38+
def start_postgres_container(postgres_user: str,
39+
dbname: str,
40+
password: str,
41+
port: int) -> Container:
3742
docker_client = docker.from_env()
38-
host = "localhost"
39-
port = "5432"
40-
postgres_user = "dt"
41-
password = "1234"
42-
dbname = "forms_db"
43-
test_table = "test_table"
4443

4544
# Start the PostgreSQL container
4645
try:
@@ -56,30 +55,102 @@ def setup_postgres():
5655
)
5756
except DockerException as e:
5857
raise Exception(f"Failed to start PostgreSQL container: {e}")
58+
59+
return container
60+
61+
def load_table(postgres_user: str,
62+
password: str, dbname: str,
63+
host: str, port: str,
64+
dataset_path: str,
65+
schema_path: str,
66+
test_table: str):
67+
68+
# Wait for the PostgreSQL service to be ready
69+
wait_for_postgres(host=host, port=port, user=postgres_user,
70+
password=password, dbname=dbname)
71+
72+
# Load a DataFrame into the database
73+
engine = create_engine(f"postgresql://{postgres_user}:{password}@{host}:{port}/{dbname}")
74+
with open(schema_path, 'r') as schema_file:
75+
schema_sql = schema_file.read()
76+
with engine.connect() as connection:
77+
connection.execute(schema_sql)
78+
79+
df = pd.read_csv(dataset_path)
80+
df.to_sql(test_table, engine, if_exists="append", index=False)
81+
82+
83+
def run(dataset_path, schema_path, table_name, primary_key,
84+
formula_file_path, run, pipeline_optimization: bool, output_folder):
85+
86+
host = "localhost"
87+
port = "5432"
88+
postgres_user = "dt"
89+
password = "1234"
90+
dbname = "forms_db"
91+
order_key = primary_key
92+
93+
container = start_postgres_container(postgres_user, dbname, password, port)
5994

6095
try:
61-
# Wait for the PostgreSQL service to be ready
62-
wait_for_postgres(host=host, port=port, user=postgres_user, password=password, dbname=dbname)
96+
load_table(postgres_user, password, dbname, host, port, dataset_path, schema_path, table_name)
6397

64-
# Set the environment variables for the database connection
65-
os.environ["POSTGRES_USER"] = postgres_user
66-
os.environ["POSTGRES_PASSWORD"] = password
67-
os.environ["POSTGRES_DB"] = dbname
68-
os.environ["POSTGRES_HOST"] = host
69-
os.environ["POSTGRES_PORT"] = port
98+
wb = from_db(
99+
host=host,
100+
port=int(port),
101+
username=postgres_user,
102+
password=password,
103+
db_name=dbname,
104+
table_name=table_name,
105+
primary_key=primary_key,
106+
order_key=order_key,
107+
enable_rewriting=True,
108+
enable_pipelining=pipeline_optimization,
109+
)
70110

71-
# Load a DataFrame into the database
72-
engine = create_engine(f"postgresql://{postgres_user}:{password}@{host}:{port}/{dbname}")
73-
df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2, 2, 2, 2], "c": [2, 3, 4, 5], "d": [3, 2, 2, 3]})
74-
df.to_sql(test_table, engine, if_exists="replace", index=False)
111+
# Parse the formula file
112+
formula_id = 'formula_id'
113+
formula_string = 'formula_string'
114+
formulas = pd.read_csv(formula_file_path, header=None, names=[formula_id, formula_string])
75115

76-
os.environ["POSTGRES_TEST_TABLE"] = test_table
77-
os.environ["POSTGRES_PRIMARY_KEY"] = "a"
78-
os.environ["POSTGRES_ORDER_KEY"] = "a"
116+
for _, row in formulas.iterrows():
117+
formula_id = row['formula_id']
118+
formula_string = row['formula_string']
119+
# Execute the formula string
120+
print(f"Running formula {formula_id}: {formula_string}")
121+
wb.compute_formula(formula_string)
79122

80-
yield
123+
# Close the DBWorkbook
124+
wb.close()
81125

82126
finally:
83127
# Tear down the PostgreSQL container
84128
container.stop()
85-
container.remove()
129+
container.remove()
130+
131+
132+
if __name__ == "__main__":
133+
import argparse
134+
135+
parser = argparse.ArgumentParser(description="Benchmarking script for FormS")
136+
parser.add_argument("--dataset_path", required=True, help="Path to the dataset folder")
137+
parser.add_argument("--schema_path", required=True, help="Path to the sql query that creates the table")
138+
parser.add_argument("--table_name", required=True, help="Name of the table")
139+
parser.add_argument("--primary_key", required=True, help="Primary key of the table")
140+
parser.add_argument("--formula_file_path", required=True, help="Path of the formula file")
141+
parser.add_argument("--run", required=True, help="Test run identifier")
142+
parser.add_argument("--pipeline_optimization", required=True, help="False: function-level transalation; True: subtree-level transalation)")
143+
parser.add_argument("--output_folder", required=True, help="Path to the output folder")
144+
145+
args = parser.parse_args()
146+
147+
run(
148+
dataset_path=args.dataset_path,
149+
schema_path=args.schema_path,
150+
table_name=args.table_name,
151+
primary_key=args.primary_key,
152+
formula_file_path=args.formula_file_path,
153+
run=args.run,
154+
pipeline_optimization=args.pipeline_optimization,
155+
output_folder=args.output_folder
156+
)

0 commit comments

Comments
 (0)