Skip to content

Commit 521c24c

Browse files
committed
Implement reverse performance optimization
This commit optimizes the `reverse` command in the Calcite planner by intelligently reversing existing sort collations instead of always using the ROW_NUMBER() approach. Key changes: - Added PlanUtils.reverseCollation() method to flip sort directions and null directions - Updated CalciteRelNodeVisitor.visitReverse() to: - Check for existing sort collations - Reverse them if present (more efficient) - Fall back to ROW_NUMBER() when no sort exists - Added comprehensive integration test expected outputs for: - Single field reverse pushdown - Multiple field reverse pushdown - Reverse fallback cases - Double reverse no-op optimizations This optimization significantly improves performance when reversing already-sorted data by leveraging database-native sort reversal. Based on PR opensearch-project#4056 by @selsong Signed-off-by: Kai Huang <ahkcs@amazon.com> # Conflicts: # core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java
1 parent 7313be5 commit 521c24c

10 files changed

Lines changed: 107 additions & 13 deletions

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
4848
import org.apache.calcite.plan.RelOptTable;
4949
import org.apache.calcite.plan.ViewExpanders;
50+
import org.apache.calcite.rel.RelCollation;
5051
import org.apache.calcite.rel.RelNode;
5152
import org.apache.calcite.rel.core.Aggregate;
5253
import org.apache.calcite.rel.core.JoinRelType;
@@ -687,19 +688,30 @@ public RelNode visitHead(Head node, CalcitePlanContext context) {
687688
public RelNode visitReverse(
688689
org.opensearch.sql.ast.tree.Reverse node, CalcitePlanContext context) {
689690
visitChildren(node, context);
690-
// Add ROW_NUMBER() column
691-
RexNode rowNumber =
692-
context
693-
.relBuilder
694-
.aggregateCall(SqlStdOperatorTable.ROW_NUMBER)
695-
.over()
696-
.rowsTo(RexWindowBounds.CURRENT_ROW)
697-
.as(REVERSE_ROW_NUM);
698-
context.relBuilder.projectPlus(rowNumber);
699-
// Sort by row number descending
700-
context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM)));
701-
// Remove row number column
702-
context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM));
691+
692+
// Check if there's an existing sort to reverse
693+
List<RelCollation> collations =
694+
context.relBuilder.getCluster().getMetadataQuery().collations(context.relBuilder.peek());
695+
RelCollation collation = collations != null && !collations.isEmpty() ? collations.get(0) : null;
696+
697+
if (collation != null && !collation.getFieldCollations().isEmpty()) {
698+
// If there's an existing sort, reverse its direction
699+
RelCollation reversedCollation = PlanUtils.reverseCollation(collation);
700+
context.relBuilder.sort(reversedCollation);
701+
} else {
702+
// Fallback: use ROW_NUMBER approach when no existing sort
703+
RexNode rowNumber =
704+
context
705+
.relBuilder
706+
.aggregateCall(SqlStdOperatorTable.ROW_NUMBER)
707+
.over()
708+
.rowsTo(RexWindowBounds.CURRENT_ROW)
709+
.as(REVERSE_ROW_NUM);
710+
context.relBuilder.projectPlus(rowNumber);
711+
context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM)));
712+
context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM));
713+
}
714+
703715
return context.relBuilder.peek();
704716
}
705717

core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
import org.apache.calcite.plan.Convention;
2828
import org.apache.calcite.plan.RelOptRuleCall;
2929
import org.apache.calcite.plan.RelOptTable;
30+
import org.apache.calcite.rel.RelCollation;
31+
import org.apache.calcite.rel.RelCollations;
32+
import org.apache.calcite.rel.RelFieldCollation;
3033
import org.apache.calcite.plan.volcano.VolcanoPlanner;
3134
import org.apache.calcite.rel.RelHomogeneousShuttle;
3235
import org.apache.calcite.rel.RelNode;
@@ -593,6 +596,37 @@ public Void visitCorrelVariable(RexCorrelVariable correlVar) {
593596
}
594597
}
595598

599+
/**
600+
* Reverses the direction of a RelCollation.
601+
*
602+
* @param original The original collation to reverse
603+
* @return A new RelCollation with reversed directions
604+
*/
605+
public static RelCollation reverseCollation(RelCollation original) {
606+
if (original == null || original.getFieldCollations().isEmpty()) {
607+
return original;
608+
}
609+
610+
List<RelFieldCollation> reversedFields = new ArrayList<>();
611+
for (RelFieldCollation field : original.getFieldCollations()) {
612+
RelFieldCollation.Direction reversedDirection = field.direction.reverse();
613+
614+
// Handle null direction properly - reverse it as well
615+
RelFieldCollation.NullDirection reversedNullDirection =
616+
field.nullDirection == RelFieldCollation.NullDirection.FIRST
617+
? RelFieldCollation.NullDirection.LAST
618+
: field.nullDirection == RelFieldCollation.NullDirection.LAST
619+
? RelFieldCollation.NullDirection.FIRST
620+
: field.nullDirection;
621+
622+
RelFieldCollation reversedField =
623+
new RelFieldCollation(field.getFieldIndex(), reversedDirection, reversedNullDirection);
624+
reversedFields.add(reversedField);
625+
}
626+
627+
return RelCollations.of(reversedFields);
628+
}
629+
596630
/** Adds a rel node to the top of the stack while preserving the field names and aliases. */
597631
static void replaceTop(RelBuilder relBuilder, RelNode relNode) {
598632
try {
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1])\n LogicalSort(sort0=[$8], dir0=[DESC])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1], _id=[$2], _index=[$3], _score=[$4], _maxscore=[$5], _sort=[$6], _routing=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n LogicalSort(sort0=[$8], dir0=[DESC])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1], _id=[$2], _index=[$3], _score=[$4], _maxscore=[$5], _sort=[$6], _routing=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n",
4+
"physical": "EnumerableCalc(expr#0..3=[{inputs}], proj#0..1=[{exprs}])\n EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$3], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableSort(sort0=[$2], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"cpu_usage\",\"@timestamp\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..11=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$11], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"firstname.keyword\":{\"order\":\"desc\",\"missing\":\"_last\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$17], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
5+
}
6+
}

0 commit comments

Comments
 (0)