Skip to content

Commit ffe1c86

Browse files
committed
First batch of results
1 parent cbc80db commit ffe1c86

15 files changed

Lines changed: 1586 additions & 60 deletions

File tree

forms/executor/dbexecutor/dbexecutor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def execute_formula_plan(self, formula_plan: PlanNode) -> pd.DataFrame:
8484
exec_subtree = scheduler.next_subtree()
8585
is_root_subtree = not scheduler.has_next_subtree()
8686
intermediate_table_name = (
87-
exec_tree.intermediate_table_name if isinstance(exec_tree, DBFuncExecNode) else ""
87+
exec_subtree.intermediate_table_name if isinstance(exec_subtree, DBFuncExecNode) else ""
8888
)
8989
start_time = time.time()
9090
sql_composable = translate(

forms/executor/dbexecutor/scheduler.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
)
2626
from forms.utils.reference import RefType
2727
from forms.core.catalog import TEMP_TABLE_PREFIX
28-
28+
temp_table_number: int = 0
2929

3030
def break_down_into_subtrees(exec_tree: DBExecNode, enable_pipelining: bool) -> list:
3131
if isinstance(exec_tree, DBFuncExecNode):
@@ -80,9 +80,12 @@ class Scheduler:
8080
def __init__(self, exec_tree: DBExecNode, enable_pipelining: bool):
8181
self.exec_tree = exec_tree
8282
self.subtrees = break_down_into_subtrees(exec_tree, enable_pipelining)
83-
for subtree_index, subtree in enumerate(self.subtrees):
83+
for _, subtree in enumerate(self.subtrees):
8484
if isinstance(subtree, DBFuncExecNode):
85-
subtree.set_intermediate_table_name(TEMP_TABLE_PREFIX + str(subtree_index))
85+
global temp_table_number
86+
intermediate_table_name = TEMP_TABLE_PREFIX + str(temp_table_number)
87+
subtree.set_intermediate_table_name(intermediate_table_name)
88+
temp_table_number += 1
8689

8790
def next_subtree(self) -> DBExecNode:
8891
return self.subtrees.pop()

forms/executor/dbexecutor/translation.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,18 @@ def create_temp_table(sel_query: Composable, subtree_temp_table_name: str) -> Co
113113

114114

115115
def find_or_generate_base_table(subtree: DBExecNode) -> Composable:
116-
ref_node_list = subtree.collect_ref_nodes_in_order()
117-
if all(ref_node.table.table_name == BASE_TABLE for ref_node in ref_node_list):
118-
return sql.SQL("""{}""").format(sql.Identifier(BASE_TABLE))
116+
collected_ref_node_list = subtree.collect_ref_nodes_in_order()
117+
# deduplicate
118+
seen = False
119+
ref_node_list = []
120+
for ref_node in collected_ref_node_list:
121+
if ref_node.table.table_name == BASE_TABLE and not seen:
122+
seen = True
123+
ref_node_list.append(ref_node)
124+
elif ref_node.table.table_name != BASE_TABLE:
125+
ref_node_list.append(ref_node)
126+
if len(ref_node_list) == 1:
127+
return sql.SQL("""{}""").format(sql.Identifier(ref_node_list[0].table.table_name))
119128
else:
120129
table_list = [sql.Identifier(ref_node.table.table_name) for ref_node in ref_node_list]
121130
join_condition_list = [
@@ -127,7 +136,7 @@ def find_or_generate_base_table(subtree: DBExecNode) -> Composable:
127136
for i in range(1, len(ref_node_list))
128137
]
129138
column_list = []
130-
for ref_node in ref_node_list:
139+
for ref_node in collected_ref_node_list:
131140
for column_str in ref_node.cols:
132141
if column_str != ROW_ID:
133142
column_list.append(sql.Identifier(column_str))
@@ -440,7 +449,10 @@ def translate_reference(
440449
ref_node: DBRefExecNode, exec_context: DBExecContext, base_table: Composable
441450
) -> Composable:
442451
ref_col = ref_node.cols[0]
443-
if ref_node.out_ref_type == RefType.RR:
452+
ref_col_type = ref_node.table.get_column_type(0)
453+
if ref_node.out_ref_type == RefType.RR and ref_col_type == "boolean":
454+
return sql.SQL("""{ref_col}""").format(ref_col=sql.Identifier(ref_col))
455+
elif ref_node.out_ref_type == RefType.RR:
444456
agg_sql = sql.SQL("""SUM({ref_col})""").format(ref_col=sql.Identifier(ref_col))
445457
window_size_sql = compute_window_size_expression(ref_node, exec_context)
446458
return agg_sql + window_size_sql

forms/utils/metrics.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
16+
1517
PARSING_TIME = "parsing_time"
1618
REWRITE_TIME = "rewrite_time"
1719
TRANSLATION_TIME = "translation_time"
@@ -29,7 +31,7 @@ def put_one_metric(self, key, value):
2931
self.metrics[key] = value
3032

3133
def get_metrics(self):
32-
return self.metrics
34+
return copy.deepcopy(self.metrics)
3335

3436
def reset_metrics(self):
3537
self.metrics = {}

systests/ICRISAT_test.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ if [ "$#" -ne 1 ]; then
99
fi
1010

1111

12-
$TEST_HOME/run.sh ICRISAT ICRISAT.csv obvdate $1
12+
$TEST_HOME/run.sh ICRISAT ICRISAT.csv ICRISAT_formula.csv obvdate $1
13+
# $TEST_HOME/run.sh ICRISAT ICRISAT.csv test_formula.csv obvdate $1
Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,20 @@
1-
=C1+D2+E3
1+
=C1+D1
2+
=C1+D2
3+
=SUM(C1:D2)
4+
=SUM($C$1:D1)
5+
=SUMIF(C1:D2, ">15")
6+
=SUMIF($C$1:D1, ">15")
7+
=AVERAGE(C1:D2)
8+
=AVERAGE($C$1:D1)
9+
=AVERAGEIF(C1:D2, ">15")
10+
=AVERAGEIF($C$1:D1, ">15")
11+
=COUNTIF(C1:D2, ">15")
12+
=COUNTIF($C$1:D1, ">15")
13+
=MAX(C1:D2)
14+
=MAX($C$1:D1)
15+
=MAXIF(C1:D2, ">15")
16+
=MAXIF($C$1:D1, ">15")
17+
=IF(E1>F1, G1, H1)
18+
=LOOKUP(F1, E$1:E$14853, D$1:D$14853, 0)
19+
=LOOKUP(F1, E$1:E$14853, D$1:D$14853, 1)
20+
=INDEX(E$1:E$14853,F1)

systests/formulas/test_formula.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
=LOOKUP(F1, E$1:E$14853, D$1:D$14853, 0)
2+
=LOOKUP(F1, E$1:E$14853, D$1:D$14853, 1)
3+
=INDEX(E$1:E$14853,F1)
4+
=IF(E1>F1, G1, H1)
Lines changed: 254 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,262 @@
11
{
2-
"=C1+D2+E3": {
3-
"formula_string": "=C1+D2+E3",
2+
"=C1+D1": {
3+
"formula_string": "=C1+D1",
44
"run": 1,
55
"optimization": "function",
66
"metrics": {
7-
"parsing_time": 216,
8-
"rewrite_time": 57,
7+
"parsing_time": 202,
8+
"rewrite_time": 49,
9+
"num_subplans": 1,
10+
"translation_time": 114,
11+
"execution_time": 23044,
12+
"total_time": 23862
13+
}
14+
},
15+
"=C1+D2": {
16+
"formula_string": "=C1+D2",
17+
"run": 1,
18+
"optimization": "function",
19+
"metrics": {
20+
"parsing_time": 65,
21+
"rewrite_time": 16,
22+
"num_subplans": 1,
23+
"translation_time": 41,
24+
"execution_time": 15236,
25+
"total_time": 15735
26+
}
27+
},
28+
"=SUM(C1:D2)": {
29+
"formula_string": "=SUM(C1:D2)",
30+
"run": 1,
31+
"optimization": "function",
32+
"metrics": {
33+
"parsing_time": 108,
34+
"rewrite_time": 6,
35+
"num_subplans": 1,
36+
"translation_time": 29,
37+
"execution_time": 14172,
38+
"total_time": 14799
39+
}
40+
},
41+
"=SUM($C$1:D1)": {
42+
"formula_string": "=SUM($C$1:D1)",
43+
"run": 1,
44+
"optimization": "function",
45+
"metrics": {
46+
"parsing_time": 53,
47+
"rewrite_time": 10,
48+
"num_subplans": 1,
49+
"translation_time": 26,
50+
"execution_time": 13819,
51+
"total_time": 14259
52+
}
53+
},
54+
"=SUMIF(C1:D2, \">15\")": {
55+
"formula_string": "=SUMIF(C1:D2, \">15\")",
56+
"run": 1,
57+
"optimization": "function",
58+
"metrics": {
59+
"parsing_time": 70,
60+
"rewrite_time": 23,
61+
"num_subplans": 5,
62+
"translation_time": 192,
63+
"execution_time": 43420,
64+
"total_time": 44663
65+
}
66+
},
67+
"=SUMIF($C$1:D1, \">15\")": {
68+
"formula_string": "=SUMIF($C$1:D1, \">15\")",
69+
"run": 1,
70+
"optimization": "function",
71+
"metrics": {
72+
"parsing_time": 81,
73+
"rewrite_time": 19,
74+
"num_subplans": 5,
75+
"translation_time": 168,
76+
"execution_time": 40102,
77+
"total_time": 41110
78+
}
79+
},
80+
"=AVERAGE(C1:D2)": {
81+
"formula_string": "=AVERAGE(C1:D2)",
82+
"run": 1,
83+
"optimization": "function",
84+
"metrics": {
85+
"parsing_time": 55,
86+
"rewrite_time": 8,
87+
"num_subplans": 1,
88+
"translation_time": 23,
89+
"execution_time": 15673,
90+
"total_time": 16156
91+
}
92+
},
93+
"=AVERAGE($C$1:D1)": {
94+
"formula_string": "=AVERAGE($C$1:D1)",
95+
"run": 1,
96+
"optimization": "function",
97+
"metrics": {
98+
"parsing_time": 45,
99+
"rewrite_time": 7,
100+
"num_subplans": 1,
101+
"translation_time": 21,
102+
"execution_time": 15365,
103+
"total_time": 15865
104+
}
105+
},
106+
"=AVERAGEIF(C1:D2, \">15\")": {
107+
"formula_string": "=AVERAGEIF(C1:D2, \">15\")",
108+
"run": 1,
109+
"optimization": "function",
110+
"metrics": {
111+
"parsing_time": 74,
112+
"rewrite_time": 27,
113+
"num_subplans": 8,
114+
"translation_time": 285,
115+
"execution_time": 72928,
116+
"total_time": 74023
117+
}
118+
},
119+
"=AVERAGEIF($C$1:D1, \">15\")": {
120+
"formula_string": "=AVERAGEIF($C$1:D1, \">15\")",
121+
"run": 1,
122+
"optimization": "function",
123+
"metrics": {
124+
"parsing_time": 65,
125+
"rewrite_time": 27,
126+
"num_subplans": 8,
127+
"translation_time": 278,
128+
"execution_time": 67299,
129+
"total_time": 68638
130+
}
131+
},
132+
"=COUNTIF(C1:D2, \">15\")": {
133+
"formula_string": "=COUNTIF(C1:D2, \">15\")",
134+
"run": 1,
135+
"optimization": "function",
136+
"metrics": {
137+
"parsing_time": 66,
138+
"rewrite_time": 10,
139+
"num_subplans": 1,
140+
"translation_time": 23,
141+
"execution_time": 13358,
142+
"total_time": 13777
143+
}
144+
},
145+
"=COUNTIF($C$1:D1, \">15\")": {
146+
"formula_string": "=COUNTIF($C$1:D1, \">15\")",
147+
"run": 1,
148+
"optimization": "function",
149+
"metrics": {
150+
"parsing_time": 57,
151+
"rewrite_time": 8,
152+
"num_subplans": 1,
153+
"translation_time": 21,
154+
"execution_time": 11900,
155+
"total_time": 12345
156+
}
157+
},
158+
"=MAX(C1:D2)": {
159+
"formula_string": "=MAX(C1:D2)",
160+
"run": 1,
161+
"optimization": "function",
162+
"metrics": {
163+
"parsing_time": 44,
164+
"rewrite_time": 8,
165+
"num_subplans": 1,
166+
"translation_time": 24,
167+
"execution_time": 13683,
168+
"total_time": 14137
169+
}
170+
},
171+
"=MAX($C$1:D1)": {
172+
"formula_string": "=MAX($C$1:D1)",
173+
"run": 1,
174+
"optimization": "function",
175+
"metrics": {
176+
"parsing_time": 53,
177+
"rewrite_time": 7,
178+
"num_subplans": 1,
179+
"translation_time": 22,
180+
"execution_time": 11914,
181+
"total_time": 12327
182+
}
183+
},
184+
"=MAXIF(C1:D2, \">15\")": {
185+
"formula_string": "=MAXIF(C1:D2, \">15\")",
186+
"run": 1,
187+
"optimization": "function",
188+
"metrics": {
189+
"parsing_time": 55,
190+
"rewrite_time": 6,
191+
"num_subplans": 1,
192+
"translation_time": 29,
193+
"execution_time": 13365,
194+
"total_time": 13823
195+
}
196+
},
197+
"=MAXIF($C$1:D1, \">15\")": {
198+
"formula_string": "=MAXIF($C$1:D1, \">15\")",
199+
"run": 1,
200+
"optimization": "function",
201+
"metrics": {
202+
"parsing_time": 61,
203+
"rewrite_time": 10,
204+
"num_subplans": 1,
205+
"translation_time": 30,
206+
"execution_time": 12177,
207+
"total_time": 12667
208+
}
209+
},
210+
"=IF(E1>F1, G1, H1)": {
211+
"formula_string": "=IF(E1>F1, G1, H1)",
212+
"run": 1,
213+
"optimization": "function",
214+
"metrics": {
215+
"parsing_time": 111,
216+
"rewrite_time": 8,
9217
"num_subplans": 2,
10-
"translation_time": 402,
11-
"execution_time": 54562,
12-
"total_time": 55815
218+
"translation_time": 85,
219+
"execution_time": 22774,
220+
"total_time": 23905
221+
}
222+
},
223+
"=LOOKUP(F1, E$1:E$14853, D$1:D$14853, 0)": {
224+
"formula_string": "=LOOKUP(F1, E$1:E$14853, D$1:D$14853, 0)",
225+
"run": 1,
226+
"optimization": "function",
227+
"metrics": {
228+
"parsing_time": 113,
229+
"rewrite_time": 9,
230+
"num_subplans": 1,
231+
"translation_time": 24,
232+
"execution_time": 112862,
233+
"total_time": 113430
234+
}
235+
},
236+
"=LOOKUP(F1, E$1:E$14853, D$1:D$14853, 1)": {
237+
"formula_string": "=LOOKUP(F1, E$1:E$14853, D$1:D$14853, 1)",
238+
"run": 1,
239+
"optimization": "function",
240+
"metrics": {
241+
"parsing_time": 93,
242+
"rewrite_time": 8,
243+
"num_subplans": 1,
244+
"translation_time": 24,
245+
"execution_time": 3070096,
246+
"total_time": 3070772
247+
}
248+
},
249+
"=INDEX(E$1:E$14853,F1)": {
250+
"formula_string": "=INDEX(E$1:E$14853,F1)",
251+
"run": 1,
252+
"optimization": "function",
253+
"metrics": {
254+
"parsing_time": 178,
255+
"rewrite_time": 23,
256+
"num_subplans": 1,
257+
"translation_time": 61,
258+
"execution_time": 30956,
259+
"total_time": 31687
13260
}
14261
}
15262
}

0 commit comments

Comments
 (0)