From 56dcec87f0d48fe6740d7fc113da01d0eeda822b Mon Sep 17 00:00:00 2001 From: Fernando Velasquez Date: Wed, 16 Feb 2022 15:53:11 -0500 Subject: [PATCH] Improvements to logging --- .../bigquery/relational/BigQueryRelation.java | 192 ++++++++++++++-- .../bigquery/sqlengine/BigQuerySQLEngine.java | 2 +- .../relational/BigQueryRelationTest.java | 208 +++++++++++++++--- 3 files changed, 355 insertions(+), 47 deletions(-) diff --git a/src/main/java/io/cdap/plugin/gcp/bigquery/relational/BigQueryRelation.java b/src/main/java/io/cdap/plugin/gcp/bigquery/relational/BigQueryRelation.java index 5d611d6806..3e08723a6b 100644 --- a/src/main/java/io/cdap/plugin/gcp/bigquery/relational/BigQueryRelation.java +++ b/src/main/java/io/cdap/plugin/gcp/bigquery/relational/BigQueryRelation.java @@ -3,6 +3,7 @@ import com.google.common.annotations.VisibleForTesting; import io.cdap.cdap.etl.api.aggregation.DeduplicateAggregationDefinition; import io.cdap.cdap.etl.api.aggregation.GroupByAggregationDefinition; +import io.cdap.cdap.etl.api.engine.sql.SQLEngineException; import io.cdap.cdap.etl.api.relational.Expression; import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; @@ -11,13 +12,16 @@ import io.cdap.plugin.gcp.bigquery.sqlengine.builder.BigQueryGroupBySQLBuilder; import io.cdap.plugin.gcp.bigquery.sqlengine.builder.BigQueryNestedSelectSQLBuilder; import io.cdap.plugin.gcp.bigquery.sqlengine.builder.BigQuerySelectSQLBuilder; +import org.apache.parquet.Strings; import java.util.Collection; import java.util.LinkedHashMap; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nullable; /** @@ -29,8 +33,9 @@ public class BigQueryRelation implements Relation { private final String datasetName; private final Set columns; private final BigQueryRelation parent; + private final Supplier sqlStatementSupplier; + private Map sourceDatasets; - private final Supplier expressionSupplier; /** * Gets a new BigQueryRelation instance @@ -50,13 +55,22 @@ protected BigQueryRelation(String datasetName, this.datasetName = datasetName; this.columns = columns; this.parent = null; - this.expressionSupplier = () -> { + this.sqlStatementSupplier = () -> { + + // Check if Dataset exists BigQuerySQLDataset sourceDataset = sourceDatasets.get(datasetName); + if (sourceDataset == null) { + throw new SQLEngineException("Unable to find dataset with name " + datasetName); + } + + // Build selected columns for this dataset based on initial columns present in relation. Map selectedColumns = getSelectedColumns(columns); + // Build source table identifier using the Project, Dataset and Table String sourceTable = String.format("%s.%s.%s", sourceDataset.getBigQueryProject(), sourceDataset.getBigQueryDataset(), sourceDataset.getBigQueryTable()); + // Build initial select from the source table. return buildBaseSelect(selectedColumns, sourceTable, datasetName); }; } @@ -65,11 +79,11 @@ protected BigQueryRelation(String datasetName, protected BigQueryRelation(String datasetName, Set columns, BigQueryRelation parent, - Supplier expressionSupplier) { + Supplier sqlStatementSupplier) { this.datasetName = datasetName; this.columns = columns; this.parent = parent; - this.expressionSupplier = expressionSupplier; + this.sqlStatementSupplier = sqlStatementSupplier; } private Relation getInvalidRelation(String validationError) { @@ -88,6 +102,7 @@ public String getValidationError() { /** * Get parent relation + * * @return parent relation instance. This can be null for a base relation. */ @Nullable @@ -97,14 +112,16 @@ public Relation getParent() { /** * Method use to materialize the transform expression from this dataset. + * * @return transform expression used when executing SQL statements. */ - public String getTransformExpression() { - return expressionSupplier.get(); + public String getSQLStatement() { + return sqlStatementSupplier.get(); } /** * Get columns defined in this relation + * * @return Columns defined in this relation. */ public Set getColumns() { @@ -125,6 +142,7 @@ public void setInputDatasets(Map datasets) { /** * Get the dataset name for this relation. + * * @return dataset name */ public String getDatasetName() { @@ -133,6 +151,7 @@ public String getDatasetName() { /** * Get a new relation with a redefined dataset name. + * * @param newDatasetName new dataset name for this relation. * @return new Relation with the new dataset name. */ @@ -140,7 +159,7 @@ public Relation setDatasetName(String newDatasetName) { Map selectedColumns = getSelectedColumns(columns); // Build new transform expression and return new instance. Supplier supplier = - () -> buildNestedSelect(selectedColumns, getTransformExpression(), newDatasetName, null); + () -> buildNestedSelect(selectedColumns, getSQLStatement(), newDatasetName, null); return new BigQueryRelation(newDatasetName, columns, this, supplier); } @@ -148,7 +167,8 @@ public Relation setDatasetName(String newDatasetName) { public Relation setColumn(String column, Expression value) { // check if expression is supported and valid if (!supportsExpression(value)) { - return getInvalidRelation("Unsupported or invalid expression type."); + return getInvalidRelation("Unsupported or invalid expression type : " + + getInvalidExpressionCause(value)); } Map selectedColumns = getSelectedColumns(columns); @@ -156,7 +176,7 @@ public Relation setColumn(String column, Expression value) { // Build new transform expression and return new instance. Supplier supplier = - () -> buildNestedSelect(selectedColumns, getTransformExpression(), datasetName, null); + () -> buildNestedSelect(selectedColumns, getSQLStatement(), datasetName, null); return new BigQueryRelation(datasetName, selectedColumns.keySet(), this, supplier); } @@ -173,7 +193,7 @@ public Relation dropColumn(String column) { // Build new transform expression and return new instance. Supplier supplier = - () -> buildNestedSelect(selectedColumns, getTransformExpression(), datasetName, null); + () -> buildNestedSelect(selectedColumns, getSQLStatement(), datasetName, null); return new BigQueryRelation(datasetName, selectedColumns.keySet(), this, supplier); } @@ -181,12 +201,13 @@ public Relation dropColumn(String column) { public Relation select(Map columns) { // check if all expressions are supported and valid if (!supportsExpressions(columns.values())) { - return getInvalidRelation("Unsupported or invalid expression type."); + return getInvalidRelation("Unsupported or invalid expression type: " + + getInvalidExpressionCauses(columns.values())); } // Build new transform expression and return new instance. Supplier supplier = - () -> buildNestedSelect(columns, getTransformExpression(), datasetName, null); + () -> buildNestedSelect(columns, getSQLStatement(), datasetName, null); return new BigQueryRelation(datasetName, columns.keySet(), this, supplier); } @@ -194,13 +215,14 @@ public Relation select(Map columns) { public Relation filter(Expression filter) { // check if expression is supported and valid if (!supportsExpression(filter)) { - return getInvalidRelation("Unsupported or invalid expression type."); + return getInvalidRelation("Unsupported or invalid expression type: " + + getInvalidExpressionCause(filter)); } Map selectedColumns = getSelectedColumns(columns); // Build new transform expression and return new instance. Supplier supplier = - () -> buildNestedSelect(selectedColumns, getTransformExpression(), datasetName, filter); + () -> buildNestedSelect(selectedColumns, getSQLStatement(), datasetName, filter); return new BigQueryRelation(datasetName, columns, this, supplier); } @@ -208,15 +230,16 @@ public Relation filter(Expression filter) { public Relation groupBy(GroupByAggregationDefinition definition) { // Ensure all expressions supplied in this definition are supported and valid if (!supportsGroupByAggregationDefinition(definition)) { - return getInvalidRelation("DeduplicateAggregationDefinition contains " + - "unsupported or invalid expressions"); + return getInvalidRelation("DeduplicateAggregationDefinition contains " + + "unsupported or invalid expressions: " + + collectGroupByAggregationDefinitionErrors(definition)); } Set columns = definition.getSelectExpressions().keySet(); // Build new transform expression and return new instance. Supplier supplier = - () -> buildGroupBy(definition, getTransformExpression(), datasetName); + () -> buildGroupBy(definition, getSQLStatement(), datasetName); return new BigQueryRelation(datasetName, columns, this, supplier); } @@ -224,13 +247,14 @@ public Relation groupBy(GroupByAggregationDefinition definition) { public Relation deduplicate(DeduplicateAggregationDefinition definition) { // Ensure all expressions supplied in this definition are supported and valid if (!supportsDeduplicateAggregationDefinition(definition)) { - return getInvalidRelation("DeduplicateAggregationDefinition contains " + - "unsupported or invalid expressions"); + return getInvalidRelation("DeduplicateAggregationDefinition contains " + + "unsupported or invalid expressions: " + + collectDeduplicateAggregationDefinitionErrors(definition)); } Set columns = definition.getSelectExpressions().keySet(); Supplier supplier = - () -> buildDeduplicate(definition, getTransformExpression(), datasetName); + () -> buildDeduplicate(definition, getSQLStatement(), datasetName); return new BigQueryRelation(datasetName, columns, this, supplier); } @@ -335,6 +359,41 @@ protected static boolean supportsGroupByAggregationDefinition(GroupByAggregation && supportsExpressions(groupByExpressions); } + /** + * Check if all expressions contained in a {@link GroupByAggregationDefinition} are supported. + * + * @param def {@link GroupByAggregationDefinition} to verify. + * @return boolean specifying if all expressions are supported or not. + */ + @VisibleForTesting + @Nullable + protected static String collectGroupByAggregationDefinitionErrors(GroupByAggregationDefinition def) { + // If the expression is valid, this must be null. + if (supportsGroupByAggregationDefinition(def)) { + return null; + } + + // Gets all expressions defined in this definition + Collection selectExpressions = def.getSelectExpressions().values(); + Collection groupByExpressions = def.getGroupByExpressions(); + + // Get all invalid expression causes, and prepend field origin to error reasons + String selectErrors = getInvalidExpressionCauses(selectExpressions); + if (!Strings.isNullOrEmpty(selectErrors)) { + selectErrors = "Select fields: " + selectErrors; + } + + String groupByErrors = getInvalidExpressionCauses(groupByExpressions); + if (!Strings.isNullOrEmpty(groupByErrors)) { + groupByErrors = "Grouping fields: " + groupByErrors; + } + + // Build string which concatenates all non-empty error groups, separated by a hyphen. + return Stream.of(selectErrors, groupByErrors) + .filter(Objects::nonNull) + .collect(Collectors.joining(" - ")); + } + /** * Builds a new {@link GroupByAggregationDefinition} with qualified aliases for the Select Expression. * @@ -370,6 +429,50 @@ && supportsExpressions(dedupExpressions) && supportsExpressions(orderExpressions); } + /** + * Check if all expressions contained in a {@link DeduplicateAggregationDefinition} are supported. + * + * @param def {@link DeduplicateAggregationDefinition} to verify. + * @return boolean specifying if all expressions are supported or not. + */ + @VisibleForTesting + @Nullable + protected static String collectDeduplicateAggregationDefinitionErrors(DeduplicateAggregationDefinition def) { + // If the expression is valid, this must be null. + if (supportsDeduplicateAggregationDefinition(def)) { + return null; + } + + // Gets all expressions defined in this definition + Collection selectExpressions = def.getSelectExpressions().values(); + Collection dedupExpressions = def.getGroupByExpressions(); + Collection orderExpressions = def.getFilterExpressions() + .stream() + .map(DeduplicateAggregationDefinition.FilterExpression::getExpression) + .collect(Collectors.toSet()); + + // Get all invalid expression causes, and prepend origin to error reasons + String selectErrors = getInvalidExpressionCauses(selectExpressions); + if (!Strings.isNullOrEmpty(selectErrors)) { + selectErrors = "Select fields: " + selectErrors; + } + + String dedupErrors = getInvalidExpressionCauses(dedupExpressions); + if (!Strings.isNullOrEmpty(dedupErrors)) { + dedupErrors = "Deduplication fields: " + dedupErrors; + } + + String orderErrors = getInvalidExpressionCauses(orderExpressions); + if (!Strings.isNullOrEmpty(orderErrors)) { + orderErrors = "Order fields: " + orderErrors; + } + + // Build string which concatenates all non-empty error groups, separated by a hyphen. + return Stream.of(selectErrors, dedupErrors, orderErrors) + .filter(Objects::nonNull) + .collect(Collectors.joining(" - ")); + } + /** * Builds a new {@link DeduplicateAggregationDefinition} with qualified aliases for the Select Expression. * @@ -406,4 +509,53 @@ protected static boolean supportsExpression(Expression expression) { return expression instanceof SQLExpression && expression.isValid(); } + /** + * Collects validation errors for a colleciton of expressions + * + * @param expressions collection containing expressions to verify + * @return boolean specifying if all expressions are supported or not. + */ + @VisibleForTesting + @Nullable + protected static String getInvalidExpressionCauses(Collection expressions) { + // If the expressions are valid, this is null. + if (supportsExpressions(expressions)) { + return null; + } + + // Collect failure reasons + return expressions.stream() + .map(BigQueryRelation::getInvalidExpressionCause) + .filter(Objects::nonNull) + .collect(Collectors.joining(" ; ")); + } + + /** + * Gets the validation error for an expression + * + * @param expression expression to verity + * @return Validation error for this expression, or null if the expression is valid. + */ + @VisibleForTesting + @Nullable + protected static String getInvalidExpressionCause(Expression expression) { + // If the expressions are valid, this is null. + if (supportsExpression(expression)) { + return null; + } + + // Null expressions are not supported + if (expression == null) { + return "Expression is null"; + } + + // We only support SQLExpression + if (!(expression instanceof SQLExpression)) { + return "Unsupported Expression type \"" + expression.getClass().getCanonicalName() + "\""; + } + + // Return validation error for this expression. + return expression.getValidationError() != null ? expression.getValidationError() : "Unknown"; + } + } diff --git a/src/main/java/io/cdap/plugin/gcp/bigquery/sqlengine/BigQuerySQLEngine.java b/src/main/java/io/cdap/plugin/gcp/bigquery/sqlengine/BigQuerySQLEngine.java index c3a92fce18..e7054bff37 100644 --- a/src/main/java/io/cdap/plugin/gcp/bigquery/sqlengine/BigQuerySQLEngine.java +++ b/src/main/java/io/cdap/plugin/gcp/bigquery/sqlengine/BigQuerySQLEngine.java @@ -479,7 +479,7 @@ public SQLDataset transform(SQLTransformRequest context) throws SQLEngineExcepti return executeSelect(context.getOutputDatasetName(), context.getOutputSchema(), BigQueryJobType.TRANSFORM, - relation.getTransformExpression()); + relation.getSQLStatement()); } private BigQuerySelectDataset executeSelect(String datasetName, diff --git a/src/test/java/io/cdap/plugin/gcp/bigquery/relational/BigQueryRelationTest.java b/src/test/java/io/cdap/plugin/gcp/bigquery/relational/BigQueryRelationTest.java index 433bd11fe1..89653618bd 100644 --- a/src/test/java/io/cdap/plugin/gcp/bigquery/relational/BigQueryRelationTest.java +++ b/src/test/java/io/cdap/plugin/gcp/bigquery/relational/BigQueryRelationTest.java @@ -29,6 +29,8 @@ import org.mockito.junit.MockitoJUnitRunner; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -77,7 +79,7 @@ public void testSetColumn() { Assert.assertTrue(columns.contains("b")); Assert.assertTrue(columns.contains("c")); Assert.assertEquals("SELECT `a` AS `a` , `b` AS `b` , a+b AS `c` FROM (select * from tbl) AS `ds`", - bqRelation.getTransformExpression()); + bqRelation.getSQLStatement()); } @Test @@ -104,7 +106,7 @@ public void testDropColumn() { Assert.assertEquals(1, columns.size()); Assert.assertTrue(columns.contains("a")); Assert.assertEquals("SELECT `a` AS `a` FROM (select * from tbl) AS `ds`", - bqRelation.getTransformExpression()); + bqRelation.getSQLStatement()); } @Test @@ -131,7 +133,7 @@ public void testSelect() { Assert.assertTrue(columns.contains("new_a")); Assert.assertTrue(columns.contains("new_b")); Assert.assertEquals("SELECT a AS `new_a` , b AS `new_b` FROM (select * from tbl) AS `ds`", - bqRelation.getTransformExpression()); + bqRelation.getSQLStatement()); } @Test @@ -161,7 +163,7 @@ public void testFilter() { Assert.assertTrue(columns.contains("a")); Assert.assertTrue(columns.contains("b")); Assert.assertEquals("SELECT `a` AS `a` , `b` AS `b` FROM (select * from tbl) AS `ds` WHERE a > 2", - bqRelation.getTransformExpression()); + bqRelation.getSQLStatement()); } @Test @@ -214,7 +216,7 @@ public void testGroupBy() { Assert.assertEquals("SELECT a AS `a` , MAX(a) AS `b` , MIN(b) AS `c` , d AS `d` " + "FROM ( select * from tbl ) AS ds " + "GROUP BY a , d", - bqRelation.getTransformExpression()); + bqRelation.getSQLStatement()); } @Test @@ -281,7 +283,7 @@ public void testDeduplicate() { Assert.assertTrue(columns.contains("b")); Assert.assertTrue(columns.contains("c")); Assert.assertTrue(columns.contains("d")); - String transformExpression = bqRelation.getTransformExpression(); + String transformExpression = bqRelation.getSQLStatement(); Assert.assertTrue(transformExpression.startsWith("SELECT * EXCEPT(`rn_")); Assert.assertTrue(transformExpression.contains("`) FROM (SELECT a AS `a` , b AS `b` , c AS `c` , d AS `d` , " + "ROW_NUMBER() OVER ( PARTITION BY a , d ORDER BY a DESC ) AS `")); @@ -331,35 +333,77 @@ public void testSupportsGroupByAggregationDefinition() { // Check valid definition selectFields.put("a", factory.compile("a")); groupByFields.add(factory.compile("a")); - Assert.assertTrue(baseRelation.supportsGroupByAggregationDefinition(def)); + Assert.assertTrue(BigQueryRelation.supportsGroupByAggregationDefinition(def)); selectFields.clear(); groupByFields.clear(); // Check invalid select field - selectFields.put("a", new InvalidSQLExpression("a")); + selectFields.put("a", new InvalidSQLExpression("a", "oops")); groupByFields.add(factory.compile("a")); - Assert.assertFalse(baseRelation.supportsGroupByAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsGroupByAggregationDefinition(def)); selectFields.clear(); groupByFields.clear(); // Check unsupported Select field selectFields.put("a", new NonSQLExpression()); groupByFields.add(factory.compile("a")); - Assert.assertFalse(baseRelation.supportsGroupByAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsGroupByAggregationDefinition(def)); selectFields.clear(); groupByFields.clear(); // Check invalid groupByField field selectFields.put("a", factory.compile("a")); groupByFields.add(new InvalidSQLExpression("a")); - Assert.assertFalse(baseRelation.supportsGroupByAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsGroupByAggregationDefinition(def)); selectFields.clear(); groupByFields.clear(); // Check unsupported groupByField field selectFields.put("a", factory.compile("a")); groupByFields.add(new NonSQLExpression()); - Assert.assertFalse(baseRelation.supportsGroupByAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsGroupByAggregationDefinition(def)); + selectFields.clear(); + groupByFields.clear(); + } + + @Test + public void testCollectGroupByAggregationDefinitionErrors() { + Map selectFields = new LinkedHashMap<>(); + List groupByFields = new ArrayList<>(1); + + // Set up mocks + GroupByAggregationDefinition def = mock(GroupByAggregationDefinition.class); + when(def.getSelectExpressions()).thenReturn(selectFields); + when(def.getGroupByExpressions()).thenReturn(groupByFields); + + // Check valid definition + selectFields.put("a", factory.compile("a")); + groupByFields.add(factory.compile("a")); + Assert.assertNull(BigQueryRelation.collectGroupByAggregationDefinitionErrors(def)); + selectFields.clear(); + groupByFields.clear(); + + // Check invalid select field + selectFields.put("a", new InvalidSQLExpression("a", "oops1")); + groupByFields.add(factory.compile("a")); + Assert.assertEquals("Select fields: oops1", + BigQueryRelation.collectGroupByAggregationDefinitionErrors(def)); + selectFields.clear(); + groupByFields.clear(); + + // Check invalid groupByField field + selectFields.put("a", factory.compile("a")); + groupByFields.add(new InvalidSQLExpression("a", "oops2")); + Assert.assertEquals("Grouping fields: oops2", + BigQueryRelation.collectGroupByAggregationDefinitionErrors(def)); + selectFields.clear(); + groupByFields.clear(); + + // Check invalid select and group by field + selectFields.put("a", new InvalidSQLExpression("a", "oops1")); + groupByFields.add(new InvalidSQLExpression("a", "oops2")); + Assert.assertEquals("Select fields: oops1 - Grouping fields: oops2", + BigQueryRelation.collectGroupByAggregationDefinitionErrors(def)); selectFields.clear(); groupByFields.clear(); } @@ -381,7 +425,7 @@ public void testSupportsDeduplicateAggregationDefinition() { dedupFields.add(factory.compile("a")); filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( factory.compile("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); - Assert.assertTrue(baseRelation.supportsDeduplicateAggregationDefinition(def)); + Assert.assertTrue(BigQueryRelation.supportsDeduplicateAggregationDefinition(def)); selectFields.clear(); dedupFields.clear(); filterFields.clear(); @@ -392,7 +436,7 @@ public void testSupportsDeduplicateAggregationDefinition() { dedupFields.add(factory.compile("a")); filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( factory.compile("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); - Assert.assertFalse(baseRelation.supportsDeduplicateAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsDeduplicateAggregationDefinition(def)); selectFields.clear(); dedupFields.clear(); filterFields.clear(); @@ -402,7 +446,7 @@ public void testSupportsDeduplicateAggregationDefinition() { dedupFields.add(factory.compile("a")); filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( factory.compile("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); - Assert.assertFalse(baseRelation.supportsDeduplicateAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsDeduplicateAggregationDefinition(def)); selectFields.clear(); dedupFields.clear(); filterFields.clear(); @@ -412,7 +456,7 @@ public void testSupportsDeduplicateAggregationDefinition() { dedupFields.add(new InvalidSQLExpression("a")); filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( factory.compile("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); - Assert.assertFalse(baseRelation.supportsDeduplicateAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsDeduplicateAggregationDefinition(def)); selectFields.clear(); dedupFields.clear(); filterFields.clear(); @@ -422,7 +466,7 @@ public void testSupportsDeduplicateAggregationDefinition() { dedupFields.add(new NonSQLExpression()); filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( factory.compile("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); - Assert.assertFalse(baseRelation.supportsDeduplicateAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsDeduplicateAggregationDefinition(def)); selectFields.clear(); dedupFields.clear(); filterFields.clear(); @@ -432,7 +476,7 @@ public void testSupportsDeduplicateAggregationDefinition() { dedupFields.add(factory.compile("a")); filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( new InvalidSQLExpression("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); - Assert.assertFalse(baseRelation.supportsDeduplicateAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsDeduplicateAggregationDefinition(def)); selectFields.clear(); dedupFields.clear(); filterFields.clear(); @@ -442,7 +486,81 @@ public void testSupportsDeduplicateAggregationDefinition() { dedupFields.add(factory.compile("a")); filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( new NonSQLExpression(), DeduplicateAggregationDefinition.FilterFunction.MAX)); - Assert.assertFalse(baseRelation.supportsDeduplicateAggregationDefinition(def)); + Assert.assertFalse(BigQueryRelation.supportsDeduplicateAggregationDefinition(def)); + selectFields.clear(); + dedupFields.clear(); + filterFields.clear(); + } + + @Test + public void testCollectDeduplicateAggregationDefinitionErrors() { + Map selectFields = new LinkedHashMap<>(); + List dedupFields = new ArrayList<>(1); + List filterFields = new ArrayList<>(1); + + // Set up mocks + DeduplicateAggregationDefinition def = mock(DeduplicateAggregationDefinition.class); + when(def.getSelectExpressions()).thenReturn(selectFields); + when(def.getGroupByExpressions()).thenReturn(dedupFields); + when(def.getFilterExpressions()).thenReturn(filterFields); + + // Check valid definition + selectFields.put("a", factory.compile("a")); + dedupFields.add(factory.compile("a")); + filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( + factory.compile("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); + Assert.assertNull(BigQueryRelation.collectDeduplicateAggregationDefinitionErrors(def)); + selectFields.clear(); + dedupFields.clear(); + filterFields.clear(); + + + // Check invalid select field + selectFields.put("a", new InvalidSQLExpression("a", "oops1")); + dedupFields.add(factory.compile("a")); + filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( + factory.compile("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); + Assert.assertEquals("Select fields: oops1", + BigQueryRelation.collectDeduplicateAggregationDefinitionErrors(def)); + selectFields.clear(); + dedupFields.clear(); + filterFields.clear(); + + // Check invalid deduplication field + selectFields.put("a", factory.compile("a")); + dedupFields.add(new InvalidSQLExpression("a", "oops2")); + filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( + factory.compile("a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); + Assert.assertEquals("Deduplication fields: oops2", + BigQueryRelation.collectDeduplicateAggregationDefinitionErrors(def)); + selectFields.clear(); + dedupFields.clear(); + filterFields.clear(); + + // Check invalid filter field + selectFields.put("a", factory.compile("a")); + dedupFields.add(factory.compile("a")); + filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( + new InvalidSQLExpression("a", "oops3"), DeduplicateAggregationDefinition.FilterFunction.MAX)); + Assert.assertEquals("Order fields: oops3", + BigQueryRelation.collectDeduplicateAggregationDefinitionErrors(def)); + selectFields.clear(); + dedupFields.clear(); + filterFields.clear(); + + // Check all invalid fields + selectFields.put("a", new InvalidSQLExpression("a", "oops1a")); + selectFields.put("b", new InvalidSQLExpression("b", "oops1b")); + dedupFields.add(new InvalidSQLExpression("a", "oops2a")); + dedupFields.add(new InvalidSQLExpression("b", "oops2b")); + filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( + new InvalidSQLExpression("a", "oops3a"), DeduplicateAggregationDefinition.FilterFunction.MAX)); + filterFields.add(new DeduplicateAggregationDefinition.FilterExpression( + new InvalidSQLExpression("b", "oops3b"), DeduplicateAggregationDefinition.FilterFunction.MAX)); + Assert.assertEquals("Select fields: oops1a ; oops1b" + + " - Deduplication fields: oops2a ; oops2b" + + " - Order fields: oops3a ; oops3b", + BigQueryRelation.collectDeduplicateAggregationDefinitionErrors(def)); selectFields.clear(); dedupFields.clear(); filterFields.clear(); @@ -452,38 +570,76 @@ public void testSupportsDeduplicateAggregationDefinition() { public void testSupportsExpressions() { List expressions = new ArrayList<>(2); expressions.add(factory.compile("a")); - Assert.assertTrue(baseRelation.supportsExpressions(expressions)); + Assert.assertTrue(BigQueryRelation.supportsExpressions(expressions)); expressions.add(new InvalidSQLExpression("a")); - Assert.assertFalse(baseRelation.supportsExpressions(expressions)); + Assert.assertFalse(BigQueryRelation.supportsExpressions(expressions)); expressions.remove(1); expressions.add(new NonSQLExpression()); - Assert.assertFalse(baseRelation.supportsExpressions(expressions)); + Assert.assertFalse(BigQueryRelation.supportsExpressions(expressions)); expressions.remove(1); - Assert.assertTrue(baseRelation.supportsExpressions(expressions)); + Assert.assertTrue(BigQueryRelation.supportsExpressions(expressions)); } @Test public void testSupportsExpression() { - Assert.assertTrue(baseRelation.supportsExpression(factory.compile("a"))); - Assert.assertFalse(baseRelation.supportsExpression(new InvalidSQLExpression("a"))); - Assert.assertFalse(baseRelation.supportsExpression(new NonSQLExpression())); + Assert.assertTrue(BigQueryRelation.supportsExpression(factory.compile("a"))); + Assert.assertFalse(BigQueryRelation.supportsExpression(new InvalidSQLExpression("a"))); + Assert.assertFalse(BigQueryRelation.supportsExpression(new NonSQLExpression())); + } + + @Test + public void testGetInvalidExpressionCauses() { + Collection goodExpressions = Collections.singletonList(factory.compile("a")); + Assert.assertNull(BigQueryRelation.getInvalidExpressionCauses(goodExpressions)); + + Collection badExpressions = Arrays.asList(null, new InvalidSQLExpression("a", "this is not valid")); + Assert.assertEquals( + "Expression is null ; this is not valid", + BigQueryRelation.getInvalidExpressionCauses(badExpressions)); + } + + @Test + public void testGetInvalidExpressionCause() { + Assert.assertNull(BigQueryRelation.getInvalidExpressionCause(factory.compile("a"))); + Assert.assertEquals( + "Expression is null", + BigQueryRelation.getInvalidExpressionCause(null)); + Assert.assertEquals( + "Unsupported Expression type " + + "\"io.cdap.plugin.gcp.bigquery.relational.BigQueryRelationTest.NonSQLExpression\"", + BigQueryRelation.getInvalidExpressionCause(new NonSQLExpression())); + Assert.assertEquals( + "this is not valid", + BigQueryRelation.getInvalidExpressionCause(new InvalidSQLExpression("a", "this is not valid"))); } /** * Invalid SQL expression with the correct class */ private static class InvalidSQLExpression extends SQLExpression { + private final String validationError; + public InvalidSQLExpression(String expression) { + this(expression, "Undefined"); + } + + public InvalidSQLExpression(String expression, String validationError) { super(expression); + this.validationError = validationError; } @Override public boolean isValid() { return false; } + + @Override + public String getValidationError() { + return validationError; + } } /**