Skip to content

Commit bfd31e0

Browse files
Sync with main branch (#5137)
* [BugFix] Fix the bug when boolean comparison condition is simplifed to field (#5071) * Fix the bug when boolean comparison condition is simplifed to field Signed-off-by: Songkan Tang <songkant@amazon.com> * Update tests and cover more cases Signed-off-by: Songkan Tang <songkant@amazon.com> * Correct the logic of not boolean comparison Signed-off-by: Songkan Tang <songkant@amazon.com> * Add missing IS_FALSE RexNode translation Signed-off-by: Songkan Tang <songkant@amazon.com> * Remove unnecessary boolean expression conversion Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix spotless check Signed-off-by: Songkan Tang <songkant@amazon.com> * Refactor PredicateAnalyzer logic a bit Signed-off-by: Songkan Tang <songkant@amazon.com> * Add more strict not expression match for field Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix spotless check and flaky test Signed-off-by: Songkan Tang <songkant@amazon.com> * Cover more cases for IS_FALSE, IS_NOT_TRUE, IS_NOT_FALSE Signed-off-by: Songkan Tang <songkant@amazon.com> * Complement the truth tests for expressions Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix logic Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix spotless check Signed-off-by: Songkan Tang <songkant@amazon.com> * Add additional boolean filter only pushdown explain test cases Signed-off-by: Songkan Tang <songkant@amazon.com> --------- Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix the flaky yamlRestTest caused by order of sample_logs (#5119) Signed-off-by: Lantao Jin <ltjin@amazon.com> * LAST/FIRST/TAKE aggregation should support TEXT type and Scripts (#5091) * Use fetchSource API instead of fetchFields for LAST/FIRST/TAKE aggregation Signed-off-by: Lantao Jin <ltjin@amazon.com> * Fix IT Signed-off-by: Lantao Jin <ltjin@amazon.com> * Merge remote-tracking branch 'upstream/main' into issues/5086 * LAST/FIRST/TAKE aggregation should support TEXT type and Scripts Signed-off-by: Lantao Jin <ltjin@amazon.com> * Add more tests Signed-off-by: Lantao Jin <ltjin@amazon.com> * Merge remote-tracking branch 'upstream/main' into issues/5086 * Fix IT Signed-off-by: Lantao Jin <ltjin@amazon.com> * Fix IT Signed-off-by: Lantao Jin <ltjin@amazon.com> * fix explain text Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Songkan Tang <songkant@amazon.com> Signed-off-by: Lantao Jin <ltjin@amazon.com> Co-authored-by: Songkan Tang <songkant@amazon.com>
1 parent a6cd0b8 commit bfd31e0

65 files changed

Lines changed: 1090 additions & 234 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,12 @@
2727
import org.apache.calcite.rel.type.RelDataTypeFactory;
2828
import org.apache.calcite.rex.RexBuilder;
2929
import org.apache.calcite.rex.RexCall;
30+
import org.apache.calcite.rex.RexInputRef;
3031
import org.apache.calcite.rex.RexLambdaRef;
32+
import org.apache.calcite.rex.RexLiteral;
3133
import org.apache.calcite.rex.RexNode;
3234
import org.apache.calcite.sql.SqlIntervalQualifier;
35+
import org.apache.calcite.sql.SqlOperator;
3336
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
3437
import org.apache.calcite.sql.type.ArraySqlType;
3538
import org.apache.calcite.sql.type.SqlTypeName;
@@ -190,8 +193,15 @@ public RexNode visitXor(Xor node, CalcitePlanContext context) {
190193

191194
@Override
192195
public RexNode visitNot(Not node, CalcitePlanContext context) {
193-
final RexNode expr = analyze(node.getExpression(), context);
194-
return context.relBuilder.not(expr);
196+
// Special handling for NOT(boolean_field = true/false) - see boolean comparison helpers below
197+
UnresolvedExpression inner = node.getExpression();
198+
if (inner instanceof Compare compare && "=".equals(compare.getOperator())) {
199+
RexNode result = tryMakeBooleanNotEquals(compare, context);
200+
if (result != null) {
201+
return result;
202+
}
203+
}
204+
return context.relBuilder.not(analyze(node.getExpression(), context));
195205
}
196206

197207
@Override
@@ -221,7 +231,15 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
221231
public RexNode visitCompare(Compare node, CalcitePlanContext context) {
222232
RexNode left = analyze(node.getLeft(), context);
223233
RexNode right = analyze(node.getRight(), context);
224-
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, node.getOperator(), left, right);
234+
String op = node.getOperator();
235+
// Handle boolean_field != literal -> IS_NOT_TRUE/IS_NOT_FALSE
236+
if ("!=".equals(op) || "<>".equals(op)) {
237+
RexNode result = tryMakeBooleanNotEquals(left, right, context);
238+
if (result != null) {
239+
return result;
240+
}
241+
}
242+
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right);
225243
}
226244

227245
@Override
@@ -251,6 +269,65 @@ public RexNode visitEqualTo(EqualTo node, CalcitePlanContext context) {
251269
return context.rexBuilder.equals(left, right);
252270
}
253271

272+
// ==================== Boolean NOT comparison helpers ====================
273+
// Calcite's RexSimplify transforms:
274+
// - "field = true" -> "field" (handled by PredicateAnalyzer detecting boolean field)
275+
// - "field = false" -> "NOT(field)" (handled by PredicateAnalyzer.prefix())
276+
// - "NOT(field = true)" -> "NOT(field)" -> would generate term{false}, have conflicted semantics
277+
// - "NOT(field = false)" -> "NOT(NOT(field))" -> "field" -> would generate term{true}, have
278+
// conflicted semantics
279+
// We intercept NOT(field = true/false) at AST level before Calcite optimization:
280+
// - "NOT(field = true)" -> IS_NOT_TRUE(field): matches false, null, missing
281+
// - "NOT(field = false)" -> IS_NOT_FALSE(field): matches true, null, missing
282+
283+
/**
284+
* Try to convert boolean_field != literal or NOT(boolean_field = literal) to
285+
* IS_NOT_TRUE/IS_NOT_FALSE. This preserves correct null-handling semantics.
286+
*/
287+
private RexNode tryMakeBooleanNotEquals(RexNode left, RexNode right, CalcitePlanContext context) {
288+
BooleanFieldComparison cmp = extractBooleanFieldComparison(left, right);
289+
if (cmp == null) {
290+
return null;
291+
}
292+
SqlOperator op =
293+
Boolean.FALSE.equals(cmp.literalValue)
294+
? SqlStdOperatorTable.IS_NOT_FALSE
295+
: SqlStdOperatorTable.IS_NOT_TRUE;
296+
return context.rexBuilder.makeCall(op, cmp.field);
297+
}
298+
299+
/** Overload for NOT(Compare) AST pattern. */
300+
private RexNode tryMakeBooleanNotEquals(Compare compare, CalcitePlanContext context) {
301+
return tryMakeBooleanNotEquals(
302+
analyze(compare.getLeft(), context), analyze(compare.getRight(), context), context);
303+
}
304+
305+
/** Represents a comparison between a boolean field and a boolean literal. */
306+
private record BooleanFieldComparison(RexNode field, Boolean literalValue) {}
307+
308+
/**
309+
* Extract boolean field and literal value from a comparison, normalizing operand order. Returns
310+
* null if the comparison is not between a boolean field and a boolean literal.
311+
*/
312+
private BooleanFieldComparison extractBooleanFieldComparison(RexNode left, RexNode right) {
313+
if (isBooleanField(left) && isBooleanLiteral(right)) {
314+
return new BooleanFieldComparison(left, ((RexLiteral) right).getValueAs(Boolean.class));
315+
}
316+
if (isBooleanField(right) && isBooleanLiteral(left)) {
317+
return new BooleanFieldComparison(right, ((RexLiteral) left).getValueAs(Boolean.class));
318+
}
319+
return null;
320+
}
321+
322+
private boolean isBooleanField(RexNode node) {
323+
// Only match actual field references, not arbitrary boolean expressions like CASE
324+
return node instanceof RexInputRef && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN;
325+
}
326+
327+
private boolean isBooleanLiteral(RexNode node) {
328+
return node instanceof RexLiteral && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN;
329+
}
330+
254331
/** Resolve qualified name. Note, the name should be case-sensitive. */
255332
@Override
256333
public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context) {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteAliasFieldAggregationIT.java

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ALIAS;
89
import static org.opensearch.sql.util.MatcherUtils.rows;
910
import static org.opensearch.sql.util.MatcherUtils.schema;
1011
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
@@ -25,13 +26,14 @@
2526
*/
2627
public class CalciteAliasFieldAggregationIT extends PPLIntegTestCase {
2728

28-
private static final String TEST_INDEX_ALIAS = "test_alias_bug";
29+
private static final String TEST_ALIAS_BUG = "test_alias_bug";
2930

3031
@Override
3132
public void init() throws Exception {
3233
super.init();
3334
enableCalcite();
3435
createTestIndexWithAliasFields();
36+
loadIndex(Index.DATA_TYPE_ALIAS);
3537
}
3638

3739
/**
@@ -41,14 +43,14 @@ public void init() throws Exception {
4143
private void createTestIndexWithAliasFields() throws IOException {
4244
// Delete the index if it exists (for test isolation)
4345
try {
44-
Request deleteIndex = new Request("DELETE", "/" + TEST_INDEX_ALIAS);
46+
Request deleteIndex = new Request("DELETE", "/" + TEST_ALIAS_BUG);
4547
client().performRequest(deleteIndex);
4648
} catch (ResponseException e) {
4749
// Index doesn't exist, which is fine
4850
}
4951

5052
// Create index with alias fields
51-
Request createIndex = new Request("PUT", "/" + TEST_INDEX_ALIAS);
53+
Request createIndex = new Request("PUT", "/" + TEST_ALIAS_BUG);
5254
createIndex.setJsonEntity(
5355
"{\n"
5456
+ " \"mappings\": {\n"
@@ -63,7 +65,7 @@ private void createTestIndexWithAliasFields() throws IOException {
6365
client().performRequest(createIndex);
6466

6567
// Insert test documents
66-
Request bulkRequest = new Request("POST", "/" + TEST_INDEX_ALIAS + "/_bulk?refresh=true");
68+
Request bulkRequest = new Request("POST", "/" + TEST_ALIAS_BUG + "/_bulk?refresh=true");
6769
bulkRequest.setJsonEntity(
6870
"{\"index\":{}}\n"
6971
+ "{\"created_at\": \"2024-01-01T10:00:00Z\", \"value\": 100}\n"
@@ -77,15 +79,15 @@ private void createTestIndexWithAliasFields() throws IOException {
7779
@Test
7880
public void testMinWithDateAliasField() throws IOException {
7981
JSONObject actual =
80-
executeQuery(String.format("source=%s | stats MIN(@timestamp)", TEST_INDEX_ALIAS));
82+
executeQuery(String.format("source=%s | stats MIN(@timestamp)", TEST_ALIAS_BUG));
8183
verifySchema(actual, schema("MIN(@timestamp)", "timestamp"));
8284
verifyDataRows(actual, rows("2024-01-01 10:00:00"));
8385
}
8486

8587
@Test
8688
public void testMaxWithDateAliasField() throws IOException {
8789
JSONObject actual =
88-
executeQuery(String.format("source=%s | stats MAX(@timestamp)", TEST_INDEX_ALIAS));
90+
executeQuery(String.format("source=%s | stats MAX(@timestamp)", TEST_ALIAS_BUG));
8991
verifySchema(actual, schema("MAX(@timestamp)", "timestamp"));
9092
verifyDataRows(actual, rows("2024-01-03 10:00:00"));
9193
}
@@ -94,8 +96,7 @@ public void testMaxWithDateAliasField() throws IOException {
9496
public void testMinMaxWithNumericAliasField() throws IOException {
9597
JSONObject actual =
9698
executeQuery(
97-
String.format(
98-
"source=%s | stats MIN(value_alias), MAX(value_alias)", TEST_INDEX_ALIAS));
99+
String.format("source=%s | stats MIN(value_alias), MAX(value_alias)", TEST_ALIAS_BUG));
99100
verifySchemaInOrder(
100101
actual, schema("MIN(value_alias)", "int"), schema("MAX(value_alias)", "int"));
101102
verifyDataRows(actual, rows(100, 300));
@@ -105,8 +106,7 @@ public void testMinMaxWithNumericAliasField() throws IOException {
105106
public void testFirstWithAliasField() throws IOException {
106107
JSONObject actual =
107108
executeQuery(
108-
String.format(
109-
"source=%s | sort @timestamp | stats FIRST(@timestamp)", TEST_INDEX_ALIAS));
109+
String.format("source=%s | sort @timestamp | stats FIRST(@timestamp)", TEST_ALIAS_BUG));
110110
verifySchema(actual, schema("FIRST(@timestamp)", "timestamp"));
111111
verifyDataRows(actual, rows("2024-01-01 10:00:00"));
112112
}
@@ -115,8 +115,7 @@ public void testFirstWithAliasField() throws IOException {
115115
public void testLastWithAliasField() throws IOException {
116116
JSONObject actual =
117117
executeQuery(
118-
String.format(
119-
"source=%s | sort @timestamp | stats LAST(@timestamp)", TEST_INDEX_ALIAS));
118+
String.format("source=%s | sort @timestamp | stats LAST(@timestamp)", TEST_ALIAS_BUG));
120119
verifySchema(actual, schema("LAST(@timestamp)", "timestamp"));
121120
verifyDataRows(actual, rows("2024-01-03 10:00:00"));
122121
}
@@ -126,7 +125,7 @@ public void testTakeWithAliasField() throws IOException {
126125
JSONObject actual =
127126
executeQuery(
128127
String.format(
129-
"source=%s | sort @timestamp | stats TAKE(@timestamp, 2)", TEST_INDEX_ALIAS));
128+
"source=%s | sort @timestamp | stats TAKE(@timestamp, 2)", TEST_ALIAS_BUG));
130129
verifySchema(actual, schema("TAKE(@timestamp, 2)", "array"));
131130
verifyDataRows(actual, rows(List.of("2024-01-01T10:00:00.000Z", "2024-01-02T10:00:00.000Z")));
132131
}
@@ -135,7 +134,7 @@ public void testTakeWithAliasField() throws IOException {
135134
public void testAggregationsWithOriginalFieldsStillWork() throws IOException {
136135
JSONObject actual =
137136
executeQuery(
138-
String.format("source=%s | stats MIN(created_at), MAX(value)", TEST_INDEX_ALIAS));
137+
String.format("source=%s | stats MIN(created_at), MAX(value)", TEST_ALIAS_BUG));
139138
verifySchemaInOrder(
140139
actual, schema("MIN(created_at)", "timestamp"), schema("MAX(value)", "int"));
141140
verifyDataRows(actual, rows("2024-01-01 10:00:00", 300));
@@ -147,12 +146,50 @@ public void testUnaffectedAggregationsWithAliasFields() throws IOException {
147146
executeQuery(
148147
String.format(
149148
"source=%s | stats SUM(value_alias), AVG(value_alias), COUNT(value_alias)",
150-
TEST_INDEX_ALIAS));
149+
TEST_ALIAS_BUG));
151150
verifySchemaInOrder(
152151
actual,
153152
schema("SUM(value_alias)", "bigint"),
154153
schema("AVG(value_alias)", "double"),
155154
schema("COUNT(value_alias)", "bigint"));
156155
verifyDataRows(actual, rows(600, 200.0, 3));
157156
}
157+
158+
@Test
159+
public void testAliasTypeWithLastFirstTakeLatestEarliestAggregation() throws IOException {
160+
JSONObject actual =
161+
executeQuery(
162+
String.format(
163+
"source=%s | stats take(original_text, 2), last(original_text),"
164+
+ " first(original_text), take(alias_text, 2), last(alias_text),"
165+
+ " first(alias_text), take(original_col, 2), last(original_col),"
166+
+ " first(original_col), take(alias_col, 2), last(alias_col), first(alias_col),"
167+
+ " latest(original_col), earliest(original_col), latest(alias_col),"
168+
+ " earliest(alias_col),latest(original_text), earliest(original_text),"
169+
+ " latest(alias_text), earliest(alias_text)",
170+
TEST_INDEX_ALIAS));
171+
verifyDataRows(
172+
actual,
173+
rows(
174+
List.of("a b c", "d e f"),
175+
"x y z",
176+
"a b c",
177+
List.of("a b c", "d e f"),
178+
"x y z",
179+
"a b c",
180+
List.of(1, 2),
181+
3,
182+
1,
183+
List.of(1, 2),
184+
3,
185+
1,
186+
3,
187+
1,
188+
3,
189+
1,
190+
"x y z",
191+
"a b c",
192+
"x y z",
193+
"a b c"));
194+
}
158195
}

0 commit comments

Comments
 (0)