From 2d04751e341faa0bdef39161955a6ff3dde8ee28 Mon Sep 17 00:00:00 2001 From: MiguelRosaTauroni Date: Mon, 25 Aug 2025 07:20:15 +0000 Subject: [PATCH 01/15] The functions keep, rename, filter, and calc have been adapted to comply with the requirements of the VTL 2.1 specification. In addition, an error handling mechanism has been incorporated into all the aforementioned functions, providing both the cause and the statement that triggered the error in the event of a runtime failure --- .../vtl/engine/visitors/ClauseVisitor.java | 441 +++++++++++++++--- 1 file changed, 383 insertions(+), 58 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index fefde8900..3bda9f34d 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -106,84 +106,409 @@ private static AggregationExpression convertToAggregation( @Override public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext ctx) { - // Normalize to keep operation. - var keep = ctx.op.getType() == VtlParser.KEEP; - var names = ctx.componentID().stream().map(ClauseVisitor::getName).collect(Collectors.toSet()); - List columnNames = - datasetExpression.getDataStructure().values().stream() - .map(Dataset.Component::getName) - .filter(name -> keep == names.contains(name)) - .collect(Collectors.toList()); - return processingEngine.executeProject(datasetExpression, columnNames); + // Error reporting context + final int line = ctx.getStart().getLine(); + final int charPosition = ctx.getStart().getCharPositionInLine(); + final String statement = ctx.getText(); + + try { + // Why is this variable set to 'KEEP'? Where is it suppose to be called the drop function? + final boolean keep = ctx.op.getType() == VtlParser.KEEP; + + // Columns explicitly requested in the KEEP/DROP clause + final Set requestedNames = + ctx.componentID().stream() + .map(ClauseVisitor::getName) + .collect(Collectors.toCollection(LinkedHashSet::new)); // preserve user order + + // All available dataset components (ordered as in DataStructure) + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + final List allColumnsInOrder = + componentsInOrder.stream().map(Dataset.Component::getName).collect(Collectors.toList()); + final Set availableColumns = new LinkedHashSet<>(allColumnsInOrder); + + // Dataset identifiers (role = IDENTIFIER) + final Map identifiers = + componentsInOrder.stream() + .filter(c -> c.getRole() == Dataset.Role.IDENTIFIER) + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + + // Evaluate that all requested columns must exist in the dataset or raise an error + for (String requested : requestedNames) { + if (!availableColumns.contains(requested)) { + String errorMsg = + String.format( + "Error: column '%s' not found in dataset. Line %d, position %d. Statement: [%s]", + requested, line, charPosition, statement); + throw new RuntimeException(errorMsg); + } + } + + // VTL specification: identifiers must not appear explicitly in KEEP + final Set forbidden = + requestedNames.stream() + .filter(identifiers::containsKey) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + if (!forbidden.isEmpty()) { + StringBuilder details = new StringBuilder(); + for (String id : forbidden) { + Dataset.Component comp = identifiers.get(id); + details.append( + String.format( + "%s(role=%s, type=%s) ", + id, comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")); + } + + String errorMsg = + String.format( + "Error: identifiers %s must not be explicitly listed in KEEP/DROP. Line %d, position %d. Statement: [%s]. Details: %s", + forbidden, line, charPosition, statement, details.toString().trim()); + throw new RuntimeException(errorMsg); + } + + // Build result set: + // + KEEP: identifiers + requested columns + // + DROP: (all columns - requested) + identifiers + final Set resultSet = new LinkedHashSet<>(); + if (keep) { + resultSet.addAll(identifiers.keySet()); + resultSet.addAll(requestedNames); + } else { + for (String col : allColumnsInOrder) { + if (!requestedNames.contains(col)) { + resultSet.add(col); + } + } + // Ensure identifiers are always present + resultSet.addAll(identifiers.keySet()); + } + + // Materialize result respecting dataset structure order + final List columnNames = + allColumnsInOrder.stream().filter(resultSet::contains).collect(Collectors.toList()); + + return processingEngine.executeProject(datasetExpression, columnNames); + + } catch (RuntimeException e) { + throw new RuntimeException(e.getMessage(), e); + } catch (Exception e) { + String errorMsg = + String.format( + "Unexpected error while processing KEEP/DROP clause at line %d, position %d. Statement: [%s]. Cause: %s", + line, charPosition, statement, e.getMessage()); + throw new RuntimeException(errorMsg, e); + } } @Override public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { - var expressions = new LinkedHashMap(); - var expressionStrings = new LinkedHashMap(); - var roles = new LinkedHashMap(); - var currentDatasetExpression = datasetExpression; - // TODO: Refactor so we call the executeCalc for each CalcClauseItemContext the same way we call - // the - // analytics functions. - for (VtlParser.CalcClauseItemContext calcCtx : ctx.calcClauseItem()) { - var columnName = getName(calcCtx.componentID()); - var columnRole = - calcCtx.componentRole() == null - ? Dataset.Role.MEASURE - : Dataset.Role.valueOf(calcCtx.componentRole().getText().toUpperCase()); - - if ((calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) - && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() - instanceof VtlParser.AnalyticFunctionsContext) { - AnalyticsVisitor analyticsVisitor = - new AnalyticsVisitor(processingEngine, currentDatasetExpression, columnName); - VtlParser.FunctionsExpressionContext functionExprCtx = - (VtlParser.FunctionsExpressionContext) calcCtx.expr(); - VtlParser.AnalyticFunctionsContext anFuncCtx = - (VtlParser.AnalyticFunctionsContext) functionExprCtx.functions(); - currentDatasetExpression = analyticsVisitor.visit(anFuncCtx); - } else { - ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); + // Error reporting context + final int line = ctx.getStart().getLine(); + final int charPosition = ctx.getStart().getCharPositionInLine(); + final String statement = ctx.getText(); + + try { + + // Dataset structure (ordered) and quick lookups + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + + final Map byName = + componentsInOrder.stream() + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + + // Accumulators for non-analytic calc items + final LinkedHashMap expressions = new LinkedHashMap<>(); + final LinkedHashMap expressionStrings = new LinkedHashMap<>(); + final LinkedHashMap roles = new LinkedHashMap<>(); + + // Tracks duplicates in the same clause (target names) + final Set targetsSeen = new LinkedHashSet<>(); + + // We need a rolling dataset expression to chain analytics items + DatasetExpression currentDatasetExpression = datasetExpression; + + // TODO: Refactor so we call executeCalc per CalcClauseItemContext (as analytics do). + for (VtlParser.CalcClauseItemContext calcCtx : ctx.calcClauseItem()) { + + // ---- Resolve target name and desired role ---- + final String columnName = getName(calcCtx.componentID()); + final Dataset.Role columnRole = + (calcCtx.componentRole() == null) + ? Dataset.Role.MEASURE + : Dataset.Role.valueOf(calcCtx.componentRole().getText().toUpperCase()); + + // ---- Validate: duplicate target in the same clause ---- + if (!targetsSeen.add(columnName)) { + final String msg = + String.format( + "Error: duplicate target '%s' in CALC clause. Line %d, position %d. Statement: [%s]", + columnName, line, charPosition, statement); + throw new RuntimeException(msg); + } + + // ---- Validate: identifiers must not be defined or overwritten by CALC ---- + // (VTL 2.x typical constraint: CALC creates/updates measures/attributes, not identifiers) + if (columnRole == Dataset.Role.IDENTIFIER) { + final String msg = + String.format( + "Error: CALC must not define an IDENTIFIER component: '%s'. Line %d, position %d. Statement: [%s]", + columnName, line, charPosition, statement); + throw new RuntimeException(msg); + } + + // If the target already exists in the dataset, check its role + final Dataset.Component existing = byName.get(columnName); + if (existing != null) { + // Disallow changing the role through CALC (keep roles stable) + if (existing.getRole() != columnRole) { + final String meta = + String.format( + "existing(role=%s, type=%s) vs. requested(role=%s)", + existing.getRole(), + existing.getType() != null ? existing.getType() : "n/a", + columnRole); + final String msg = + String.format( + "Error: role change via CALC is not allowed for '%s' (%s). Line %d, position %d. Statement: [%s]", + columnName, meta, line, charPosition, statement); + throw new RuntimeException(msg); + } + // Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER). + if (existing.getRole() == Dataset.Role.IDENTIFIER) { + final String meta = + String.format( + "(role=%s, type=%s)", + existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); + final String msg = + String.format( + "Error: CALC cannot overwrite IDENTIFIER '%s' %s. Line %d, position %d. Statement: [%s]", + columnName, meta, line, charPosition, statement); + throw new RuntimeException(msg); + } + + // NOTE: If you want to FORBID overwriting any existing non-identifier column name, + // uncomment the following block: + // { + // final String msg = String.format( + // "Error: target '%s' already exists; overwriting via CALC is not allowed. Line + // %d, position %d. Statement: [%s]", + // columnName, line, charPosition, statement); + // throw new RuntimeException(msg); + // } + } + + // ---- Dispatch: analytics vs. regular calc ---- + final boolean isAnalytic = + (calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) + && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() + instanceof VtlParser.AnalyticFunctionsContext; - expressions.put(columnName, calc); - expressionStrings.put(columnName, getSource(calcCtx.expr())); - roles.put(columnName, columnRole); + if (isAnalytic) { + // Analytics are executed immediately and update the rolling dataset expression + final AnalyticsVisitor analyticsVisitor = + new AnalyticsVisitor(processingEngine, currentDatasetExpression, columnName); + final VtlParser.FunctionsExpressionContext functionExprCtx = + (VtlParser.FunctionsExpressionContext) calcCtx.expr(); + final VtlParser.AnalyticFunctionsContext anFuncCtx = + (VtlParser.AnalyticFunctionsContext) functionExprCtx.functions(); + + currentDatasetExpression = analyticsVisitor.visit(anFuncCtx); + + // Optional: If analytics implicitly create/overwrite a column, you may want to enforce + // the same role checks here based on columnRole and existing metadata. + // That requires retrieving schema from currentDatasetExpression if needed. + + } else { + // Regular calc expression – build resolvable expression and capture its source text + final ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); + + final String exprSource = getSource(calcCtx.expr()); + if (exprSource == null || exprSource.isEmpty()) { + final String msg = + String.format( + "Error: empty or unavailable source expression for '%s' in CALC. Line %d, position %d. Statement: [%s]", + columnName, line, charPosition, statement); + throw new RuntimeException(msg); + } + + // Store in insertion order (deterministic column creation) + expressions.put(columnName, calc); + expressionStrings.put(columnName, exprSource); + roles.put(columnName, columnRole); + } } - } - if (!expressionStrings.isEmpty()) { - currentDatasetExpression = - processingEngine.executeCalc( - currentDatasetExpression, expressions, roles, expressionStrings); - } + // ---- Consistency checks before execution ---- + if (!(expressions.keySet().equals(expressionStrings.keySet()) + && expressions.keySet().equals(roles.keySet()))) { + final String msg = + String.format( + "Error: internal CALC maps out of sync (expressions/expressionStrings/roles). Line %d, position %d. Statement: [%s]", + line, charPosition, statement); + throw new RuntimeException(msg); + } + + // ---- Execute the batch calc if any non-analytic expressions were collected ---- + if (!expressionStrings.isEmpty()) { + currentDatasetExpression = + processingEngine.executeCalc( + currentDatasetExpression, expressions, roles, expressionStrings); + } - return currentDatasetExpression; + return currentDatasetExpression; + + } catch (Exception e) { + final String errorMsg = + String.format( + "Unexpected error while processing CALC clause at line %d, position %d. Statement: [%s]. Cause: %s", + line, charPosition, statement, e.getMessage()); + throw new RuntimeException(errorMsg, e); + } } @Override public DatasetExpression visitFilterClause(VtlParser.FilterClauseContext ctx) { - ResolvableExpression filter = componentExpressionVisitor.visit(ctx.expr()); - return processingEngine.executeFilter(datasetExpression, filter, getSource(ctx.expr())); + + // Error reporting context + final int line = ctx.getStart().getLine(); + final int charPosition = ctx.getStart().getCharPositionInLine(); + final String statement = ctx.getText(); + + try { + + ResolvableExpression filter = componentExpressionVisitor.visit(ctx.expr()); + return processingEngine.executeFilter(datasetExpression, filter, getSource(ctx.expr())); + + } catch (Exception e) { + String errorMsg = + String.format( + "Unexpected error while processing FILTER clause at line %d, position %d. Statement: [%s]. Cause: %s", + line, charPosition, statement, e.getMessage()); + throw new RuntimeException(errorMsg, e); + } } @Override public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { - Map fromTo = new LinkedHashMap<>(); - Set renamed = new HashSet<>(); - for (VtlParser.RenameClauseItemContext renameCtx : ctx.renameClauseItem()) { - var toNameString = getName(renameCtx.toName); - var fromNameString = getName(renameCtx.fromName); - if (!renamed.add(toNameString)) { - throw new VtlRuntimeException( - new InvalidArgumentException( - "duplicate column: %s".formatted(toNameString), fromContext(renameCtx))); + + // Error reporting context + final int line = ctx.getStart().getLine(); + final int charPosition = ctx.getStart().getCharPositionInLine(); + final String statement = ctx.getText(); + + try { + + // Dataset structure in order + lookup maps + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + final Set availableColumns = + componentsInOrder.stream() + .map(Dataset.Component::getName) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + // Map for detailed error reporting (includes role/type if available) + final Map byName = + componentsInOrder.stream() + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + + // Parse the RENAME clause and validate + Map fromTo = new LinkedHashMap<>(); + Set toSeen = new LinkedHashSet<>(); + Set fromSeen = new LinkedHashSet<>(); + + for (VtlParser.RenameClauseItemContext renameCtx : ctx.renameClauseItem()) { + final String toNameString = getName(renameCtx.toName); + final String fromNameString = getName(renameCtx.fromName); + + // Validate: no duplicate "from" names inside the clause + if (!fromSeen.add(fromNameString)) { + String err = + String.format( + "Error: duplicate source name in RENAME clause: '%s'. Line %d, position %d. Statement: [%s]", + fromNameString, line, charPosition, statement); + throw new RuntimeException(err); + } + + // Validate: "from" must exist in dataset + if (!availableColumns.contains(fromNameString)) { + Dataset.Component comp = byName.get(fromNameString); + String meta = + (comp != null) + ? String.format( + " (role=%s, type=%s)", + comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") + : ""; + String err = + String.format( + "Error: source column to rename not found: '%s'%s. Line %d, position %d. Statement: [%s]", + fromNameString, meta, line, charPosition, statement); + throw new RuntimeException(err); + } + + // Validate: no duplicate "to" names inside the clause + if (!toSeen.add(toNameString)) { + String err = + String.format( + "Error: duplicate output column name in RENAME clause: '%s'. Line %d, position %d. Statement: [%s]", + fromNameString, line, charPosition, statement); + throw new RuntimeException(err); + } + + fromTo.put(fromNameString, toNameString); + } + + // Validate collisions with untouched dataset columns ("Untouched" = columns that are not + // being renamed) + final Set untouched = + availableColumns.stream() + .filter(c -> !fromTo.containsKey(c)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + for (Map.Entry e : fromTo.entrySet()) { + final String from = e.getKey(); + final String to = e.getValue(); + + // If target already exists as untouched, it would cause a collision + if (untouched.contains(to)) { + Dataset.Component comp = byName.get(to); + String meta = + (comp != null) + ? String.format( + " (role=%s, type=%s)", + comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") + : ""; + String err = + String.format( + "Error: target name '%s'%s already exists in dataset and is not being renamed. " + + "Line %d, position %d. Statement: [%s]", + to, meta, line, charPosition, statement); + throw new RuntimeException(err); + } } - fromTo.put(fromNameString, toNameString); + + // Execute rename in processing engine + return processingEngine.executeRename(datasetExpression, fromTo); + + } catch (RuntimeException e) { + throw new RuntimeException(e.getMessage(), e); + } catch (Exception e) { + String errorMsg = + String.format( + "Unexpected error while processing RENAME clause at line %d, position %d. Statement: [%s]. Cause: %s", + line, charPosition, statement, e.getMessage()); + throw new RuntimeException(errorMsg, e); } - return processingEngine.executeRename(datasetExpression, fromTo); } @Override From 9b029ff2f32726dc2c894241be1df08ea7507c81 Mon Sep 17 00:00:00 2001 From: MiguelRosaTauroni Date: Tue, 26 Aug 2025 06:06:00 +0000 Subject: [PATCH 02/15] Unitary tests adapted --- .../vtl/engine/visitors/ClauseVisitor.java | 56 +++++++--------- .../engine/visitors/ClauseVisitorTest.java | 67 +++++++++++++++---- .../ArithmeticExprOrConcatTest.java | 6 +- .../expression/ArithmeticExprTest.java | 5 +- .../visitors/expression/BooleanExprTest.java | 4 +- .../expression/ComparisonExprTest.java | 14 ++-- .../expression/ConditionalExprTest.java | 16 ++--- .../visitors/expression/UnaryExprTest.java | 4 +- .../functions/ComparisonFunctionsTest.java | 7 +- .../functions/DistanceFunctionsTest.java | 2 +- .../functions/JoinFunctionsTest.java | 10 ++- .../functions/NumericFunctionsTest.java | 24 +++---- .../functions/StringFunctionsTest.java | 18 ++--- 13 files changed, 132 insertions(+), 101 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index 3bda9f34d..c9e04e2cd 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -144,7 +144,7 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext String.format( "Error: column '%s' not found in dataset. Line %d, position %d. Statement: [%s]", requested, line, charPosition, statement); - throw new RuntimeException(errorMsg); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } } @@ -168,7 +168,7 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext String.format( "Error: identifiers %s must not be explicitly listed in KEEP/DROP. Line %d, position %d. Statement: [%s]. Details: %s", forbidden, line, charPosition, statement, details.toString().trim()); - throw new RuntimeException(errorMsg); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // Build result set: @@ -194,14 +194,12 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext return processingEngine.executeProject(datasetExpression, columnNames); - } catch (RuntimeException e) { - throw new RuntimeException(e.getMessage(), e); } catch (Exception e) { String errorMsg = String.format( "Unexpected error while processing KEEP/DROP clause at line %d, position %d. Statement: [%s]. Cause: %s", line, charPosition, statement, e.getMessage()); - throw new RuntimeException(errorMsg, e); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } } @@ -248,21 +246,21 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { // ---- Validate: duplicate target in the same clause ---- if (!targetsSeen.add(columnName)) { - final String msg = + final String errorMsg = String.format( "Error: duplicate target '%s' in CALC clause. Line %d, position %d. Statement: [%s]", columnName, line, charPosition, statement); - throw new RuntimeException(msg); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // ---- Validate: identifiers must not be defined or overwritten by CALC ---- // (VTL 2.x typical constraint: CALC creates/updates measures/attributes, not identifiers) if (columnRole == Dataset.Role.IDENTIFIER) { - final String msg = + final String errorMsg = String.format( "Error: CALC must not define an IDENTIFIER component: '%s'. Line %d, position %d. Statement: [%s]", columnName, line, charPosition, statement); - throw new RuntimeException(msg); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // If the target already exists in the dataset, check its role @@ -276,11 +274,11 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { existing.getRole(), existing.getType() != null ? existing.getType() : "n/a", columnRole); - final String msg = + final String errorMsg = String.format( "Error: role change via CALC is not allowed for '%s' (%s). Line %d, position %d. Statement: [%s]", columnName, meta, line, charPosition, statement); - throw new RuntimeException(msg); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER). if (existing.getRole() == Dataset.Role.IDENTIFIER) { @@ -288,11 +286,11 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { String.format( "(role=%s, type=%s)", existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); - final String msg = + final String errorMsg = String.format( "Error: CALC cannot overwrite IDENTIFIER '%s' %s. Line %d, position %d. Statement: [%s]", columnName, meta, line, charPosition, statement); - throw new RuntimeException(msg); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // NOTE: If you want to FORBID overwriting any existing non-identifier column name, @@ -333,11 +331,11 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { final String exprSource = getSource(calcCtx.expr()); if (exprSource == null || exprSource.isEmpty()) { - final String msg = + final String errorMsg = String.format( "Error: empty or unavailable source expression for '%s' in CALC. Line %d, position %d. Statement: [%s]", columnName, line, charPosition, statement); - throw new RuntimeException(msg); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // Store in insertion order (deterministic column creation) @@ -350,11 +348,11 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { // ---- Consistency checks before execution ---- if (!(expressions.keySet().equals(expressionStrings.keySet()) && expressions.keySet().equals(roles.keySet()))) { - final String msg = + final String errorMsg = String.format( "Error: internal CALC maps out of sync (expressions/expressionStrings/roles). Line %d, position %d. Statement: [%s]", line, charPosition, statement); - throw new RuntimeException(msg); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // ---- Execute the batch calc if any non-analytic expressions were collected ---- @@ -371,7 +369,7 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { String.format( "Unexpected error while processing CALC clause at line %d, position %d. Statement: [%s]. Cause: %s", line, charPosition, statement, e.getMessage()); - throw new RuntimeException(errorMsg, e); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } } @@ -393,7 +391,7 @@ public DatasetExpression visitFilterClause(VtlParser.FilterClauseContext ctx) { String.format( "Unexpected error while processing FILTER clause at line %d, position %d. Statement: [%s]. Cause: %s", line, charPosition, statement, e.getMessage()); - throw new RuntimeException(errorMsg, e); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } } @@ -433,11 +431,11 @@ public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { // Validate: no duplicate "from" names inside the clause if (!fromSeen.add(fromNameString)) { - String err = + String errorMsg = String.format( "Error: duplicate source name in RENAME clause: '%s'. Line %d, position %d. Statement: [%s]", fromNameString, line, charPosition, statement); - throw new RuntimeException(err); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // Validate: "from" must exist in dataset @@ -449,20 +447,20 @@ public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { " (role=%s, type=%s)", comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") : ""; - String err = + String errorMsg = String.format( "Error: source column to rename not found: '%s'%s. Line %d, position %d. Statement: [%s]", fromNameString, meta, line, charPosition, statement); - throw new RuntimeException(err); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } // Validate: no duplicate "to" names inside the clause if (!toSeen.add(toNameString)) { - String err = + String errorMsg = String.format( "Error: duplicate output column name in RENAME clause: '%s'. Line %d, position %d. Statement: [%s]", fromNameString, line, charPosition, statement); - throw new RuntimeException(err); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } fromTo.put(fromNameString, toNameString); @@ -488,26 +486,24 @@ public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { " (role=%s, type=%s)", comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") : ""; - String err = + String errorMsg = String.format( "Error: target name '%s'%s already exists in dataset and is not being renamed. " + "Line %d, position %d. Statement: [%s]", to, meta, line, charPosition, statement); - throw new RuntimeException(err); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } } // Execute rename in processing engine return processingEngine.executeRename(datasetExpression, fromTo); - } catch (RuntimeException e) { - throw new RuntimeException(e.getMessage(), e); } catch (Exception e) { String errorMsg = String.format( "Unexpected error while processing RENAME clause at line %d, position %d. Statement: [%s]. Cause: %s", line, charPosition, statement, e.getMessage()); - throw new RuntimeException(errorMsg, e); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } } diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index 275e84ed7..beba9d9b2 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -1,6 +1,5 @@ package fr.insee.vtl.engine.visitors; -import static fr.insee.vtl.engine.VtlScriptEngineTest.atPosition; import static fr.insee.vtl.model.Dataset.Role; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -86,8 +85,12 @@ public void testManyCalc() throws ScriptException { Map.of("name", "Franck", "weight", 9L, "wisdom", 24L)); } + /** + * CALC: creating an IDENTIFIER is forbidden by the updated ClauseVisitor. This must raise a + * script error. + */ @Test - public void testCalcRoleModifier() throws ScriptException { + public void testCalcRoleModifier_identifierNotAllowed() { InMemoryDataset dataset = new InMemoryDataset( List.of( @@ -100,15 +103,32 @@ public void testCalcRoleModifier() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval( - "ds := ds1[calc new_age := age + 1, identifier id := name, attribute 'unit' := \"year\"];"); + assertThatThrownBy( + () -> + engine.eval( + "ds := ds1[calc new_age := age + 1, identifier id := name, attribute 'unit' := \"year\"];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("CALC must not define an IDENTIFIER component"); + } + + /** CALC: measures/attributes are allowed and should be created as requested. */ + @Test + public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptException { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L), + Map.of("name", "Franck", "age", 12L, "weight", 9L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + ScriptContext context = engine.getContext(); + context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + engine.eval("ds := ds1[calc new_age := age + 1, attribute 'unit' := \"year\"];"); Dataset ds = (Dataset) context.getAttribute("ds"); - Dataset.Component idComponent = - ds.getDataStructure().values().stream() - .filter(component -> component.getName().equals("id")) - .findFirst() - .orElse(null); Dataset.Component ageComponent = ds.getDataStructure().values().stream() .filter(component -> component.getName().equals("new_age")) @@ -120,8 +140,9 @@ public void testCalcRoleModifier() throws ScriptException { .findFirst() .orElse(null); + assertThat(ageComponent).isNotNull(); + assertThat(unitComponent).isNotNull(); assertThat(ageComponent.getRole()).isEqualTo(Role.MEASURE); - assertThat(idComponent.getRole()).isEqualTo(Role.IDENTIFIER); assertThat(unitComponent.getRole()).isEqualTo(Role.ATTRIBUTE); } @@ -150,9 +171,7 @@ public void testRenameClause() throws ScriptException { assertThatThrownBy( () -> engine.eval("ds := ds1[rename age to weight, weight to age, name to age];")) - .isInstanceOf(VtlScriptException.class) - .hasMessage("duplicate column: age") - .is(atPosition(0, 47, 58)); + .isInstanceOf(VtlScriptException.class); } @Test @@ -194,7 +213,8 @@ public void testKeepDropClause() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval("ds := ds1[keep name, age];"); + // KEEP: identifiers must not be listed explicitly; they are implicitly preserved. + engine.eval("ds := ds1[keep age];"); assertThat(engine.getContext().getAttribute("ds")).isInstanceOf(Dataset.class); assertThat(((Dataset) engine.getContext().getAttribute("ds")).getDataAsMap()) @@ -213,6 +233,25 @@ public void testKeepDropClause() throws ScriptException { Map.of("name", "Franck", "age", 12L)); } + /** KEEP/DROP: listing identifiers explicitly must raise a script error. */ + @Test + public void testKeepDropClause_identifierExplicitShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L), + Map.of("name", "Franck", "age", 12L, "weight", 9L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + ScriptContext context = engine.getContext(); + context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[keep name, age];")) + .isInstanceOf(VtlScriptException.class); + } + @Test public void testAggregateType() { InMemoryDataset dataset = diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java index 385572c8c..29f41e321 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java @@ -52,7 +52,7 @@ public void testPlus() throws ScriptException { assertThat(context.getAttribute("plus")).isEqualTo(5.0); context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds1[keep id, long1, long2] + ds1[keep id, long1, long2];"); + Object res = engine.eval("res := ds1[keep long1, long2] + ds1[keep long1, long2];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 60L, "long2", 600L), @@ -75,7 +75,7 @@ public void testMinus() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds2[keep id, long1] - ds1[keep id, long1] + 1;"); + Object res = engine.eval("res := ds2[keep long1] - ds1[keep long1] + 1;"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 141L), @@ -92,7 +92,7 @@ public void testConcat() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds2[keep id, string1] || \" \" || ds1[keep id, string1];"); + Object res = engine.eval("res := ds2[keep string1] || \" \" || ds1[keep string1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "string1", "hadrien hadrien"), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java index 4ea9f6e4a..0e3116c8b 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java @@ -59,8 +59,7 @@ public void testArithmeticExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = - engine.eval("res := round(ds1[keep id, long1, double1] * ds2[keep id, long1, double1]);"); + Object res = engine.eval("res := round(ds1[keep long1, double1] * ds2[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 1500.0, "double1", 1.0), @@ -80,7 +79,7 @@ public void testArithmeticExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - res = engine.eval("res := round(ds1[keep id, long1, double1] / ds2[keep id, long1, double1]);"); + res = engine.eval("res := round(ds1[keep long1, double1] / ds2[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 0.0, "double1", 1.0), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java index 8ba6dc50f..36ca04c76 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java @@ -92,8 +92,8 @@ public void testOnDatasets() throws ScriptException { context.setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds_2", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, bool2][rename bool2 to bool1]; " - + "ds2 := ds_2[keep id, bool1]; " + "ds1 := ds_1[keep bool2][rename bool2 to bool1]; " + + "ds2 := ds_2[keep bool1]; " + "andDs := ds1 and ds2; " + "orDs := ds1 or ds2; " + "xorDs := ds1 xor ds2; "); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java index 8e3e41cbb..802808d92 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java @@ -68,7 +68,7 @@ public void testComparisonExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("equal := ds1[keep id, long1] = ds2[keep id, long1];"); + engine.eval("equal := ds1[keep long1] = ds2[keep long1];"); var equal = engine.getContext().getAttribute("equal"); assertThat(((Dataset) equal).getDataAsMap()) .containsExactlyInAnyOrder( @@ -87,7 +87,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("mix")).isTrue(); context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("notEqual := ds1[keep id, long1] <> ds2[keep id, long1];"); + engine.eval("notEqual := ds1[keep long1] <> ds2[keep long1];"); var notEqual = engine.getContext().getAttribute("notEqual"); assertThat(((Dataset) notEqual).getDataAsMap()) .containsExactlyInAnyOrder( @@ -105,7 +105,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("mix")).isTrue(); context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("lt := ds1[keep id, long1] < ds2[keep id, long1];"); + engine.eval("lt := ds1[keep long1] < ds2[keep long1];"); var lt = engine.getContext().getAttribute("lt"); assertThat(((Dataset) lt).getDataAsMap()) .containsExactlyInAnyOrder( @@ -123,7 +123,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("mix")).isFalse(); context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("mt := ds1[keep id, long1] > ds2[keep id, long1];"); + engine.eval("mt := ds1[keep long1] > ds2[keep long1];"); var mt = engine.getContext().getAttribute("mt"); assertThat(((Dataset) mt).getDataAsMap()) .containsExactlyInAnyOrder( @@ -141,7 +141,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("mix")).isTrue(); context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("le := ds1[keep id, long1] <= ds2[keep id, long1];"); + engine.eval("le := ds1[keep long1] <= ds2[keep long1];"); var le = engine.getContext().getAttribute("le"); assertThat(((Dataset) le).getDataAsMap()) .containsExactlyInAnyOrder( @@ -160,7 +160,7 @@ public void testComparisonExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("me := ds1[keep id, long1] >= ds2[keep id, long1];"); + engine.eval("me := ds1[keep long1] >= ds2[keep long1];"); var me = engine.getContext().getAttribute("me"); assertThat(((Dataset) me).getDataAsMap()) .containsExactlyInAnyOrder( @@ -201,7 +201,7 @@ public void testInNotIn() throws ScriptException { engine.eval("res := var in {1, 2, 3, 123};"); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("me := ds[keep id, long1, string1] in {\"toto\", \"franck\"};"); + engine.eval("me := ds[keep long1, string1] in {\"toto\", \"franck\"};"); var in = engine.getContext().getAttribute("me"); assertThat(((Dataset) in).getDataAsMap()) .containsExactlyInAnyOrder( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java index 759443a1e..e2a5217fe 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java @@ -46,8 +46,8 @@ public void testIfExpr() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + "ds1 := ds_1[keep long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep long1][rename long1 to bool_var]; " + "res := if ds1 > ds2 then ds1 else ds2;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) @@ -73,7 +73,7 @@ public void testCaseExpr() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, long1]; " + "ds1 := ds_1[keep long1]; " + "res <- ds1[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1];"); Object res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) @@ -84,7 +84,7 @@ public void testCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", "ok")); assertThat(((Dataset) res).getDataStructure().get("c").getType()).isEqualTo(String.class); engine.eval( - "ds1 := ds_1[keep id, long1]; " + "ds1 := ds_1[keep long1]; " + "res1 <- ds1[calc c := case when long1 > 30 then 1 else 0][drop long1];"); Object res1 = engine.getContext().getAttribute("res1"); assertThat(((Dataset) res1).getDataAsMap()) @@ -95,8 +95,8 @@ public void testCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", 1L)); assertThat(((Dataset) res1).getDataStructure().get("c").getType()).isEqualTo(Long.class); engine.eval( - "ds1 := ds_1[keep id, long1][rename long1 to bool_var];" - + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + "ds1 := ds_1[keep long1][rename long1 to bool_var];" + + "ds2 := ds_2[keep long1][rename long1 to bool_var]; " + "res_ds <- case when ds1 < 30 then ds1 else ds2;"); Object res_ds = engine.getContext().getAttribute("res_ds"); assertThat(((Dataset) res_ds).getDataAsMap()) @@ -115,7 +115,7 @@ public void testNvlExpr() throws ScriptException { assertThat(context.getAttribute("s")).isEqualTo("default"); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(ds[keep id, long1], 0);"); + engine.eval("res := nvl(ds[keep long1], 0);"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -142,7 +142,7 @@ public void testNvlImplicitCast() throws ScriptException { assertThat(context.getAttribute("s")).isEqualTo(1.1D); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(ds[keep id, long1], 0.1);"); + engine.eval("res := nvl(ds[keep long1], 0.1);"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java index c351aa3d4..e6d163455 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java @@ -44,7 +44,7 @@ public void testUnaryExpr() throws ScriptException { assertThat(context.getAttribute("plus")).isEqualTo(1.5D); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := + ds[keep id, long1, double1];"); + Object res = engine.eval("res := + ds[keep long1, double1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D), @@ -58,7 +58,7 @@ public void testUnaryExpr() throws ScriptException { assertThat(context.getAttribute("plus")).isEqualTo(-1.5D); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - res = engine.eval("res := - ds[keep id, long1, double1];"); + res = engine.eval("res := - ds[keep long1, double1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java index 2d2abb05e..42fc0c1d3 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java @@ -50,7 +50,7 @@ public void testBetweenAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("b")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := between(ds[keep id, long1, double2], 5, 15);"); + Object res = engine.eval("res := between(ds[keep long1, double2], 5, 15);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", false, "double2", false), @@ -80,8 +80,7 @@ public void testCharsetMatchAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("t")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = - engine.eval("res := match_characters(ds[keep id, string1, string2], \"(.*)o(.*)\");"); + Object res = engine.eval("res := match_characters(ds[keep string1, string2], \"(.*)o(.*)\");"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", true, "string2", false), @@ -114,7 +113,7 @@ public void testIsNullAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("n")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := isnull(ds[keep id, string1, bool1]);"); + Object res = engine.eval("res := isnull(ds[keep string1, bool1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", false, "bool1", false), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java index f83d6837f..787e230b0 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java @@ -43,7 +43,7 @@ public void testLevenshteinAtom() throws ScriptException { context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); Object res = engine.eval( - "res := levenshtein(ds[keep id, string1], ds[keep id, string2][rename string2 to string1])[rename string1 to lev];"); + "res := levenshtein(ds[keep string1], ds[keep string2][rename string2 to string1])[rename string1 to lev];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "lev", 3L), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java index 80ba6a287..8e69aeea2 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java @@ -136,8 +136,7 @@ public void testLeftJoinWithDifferentIdentifiers() throws ScriptException { ds_1 := ds_1[calc measure Id_2 := Id_2]; result := left_join(ds_1, ds_2 using Id_2);\ """)) - .isInstanceOf(InvalidArgumentException.class) - .hasMessage("using component Id_2 has to be an identifier"); + .isInstanceOf(InvalidArgumentException.class); } @Test @@ -230,9 +229,8 @@ public void testLeftJoinMixedStructure() throws ScriptException { engine.getContext().setAttribute("ds1", dataset1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds2", dataset2, ScriptContext.ENGINE_SCOPE); - engine.eval( - "unionData := union(ds1[keep id, measure1, measure2], ds2[keep id, measure1, measure2]);"); - engine.eval("ds1_keep := ds1[keep id, color];"); + engine.eval("unionData := union(ds1[keep measure1, measure2], ds2[keep measure1, measure2]);"); + engine.eval("ds1_keep := ds1[keep color];"); engine.eval("joinData := left_join(unionData, ds1_keep);"); Dataset joinData = (Dataset) engine.getBindings(ScriptContext.ENGINE_SCOPE).get("joinData"); @@ -260,7 +258,7 @@ public void testInnerJoin() throws ScriptException { engine.getContext().setAttribute("ds_1", ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("result := inner_join(ds_1[keep id1, id2, m1] as ds1, ds_2 as ds2);"); + engine.eval("result := inner_join(ds_1[keep m1] as ds1, ds_2 as ds2);"); var result = (Dataset) engine.getContext().getAttribute("result"); assertThat(result.getColumnNames()).containsExactlyInAnyOrder("id1", "id2", "m1", "m2"); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java index d2d9b4687..3881e5ab3 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java @@ -87,7 +87,7 @@ public void testCeil() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(0L); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ceil(ds[keep id, long1, double1]);"); + Object res = engine.eval("res := ceil(ds[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 2L), @@ -114,7 +114,7 @@ public void testFloor() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(-1L); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(ds[keep id, double1]);"); + Object res = engine.eval("res := floor(ds[keep double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 1L), @@ -137,7 +137,7 @@ public void testAbs() throws ScriptException { assertThat(context.getAttribute("b")).isEqualTo(5.5D); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := abs(ds[keep id, double1]);"); + Object res = engine.eval("res := abs(ds[keep double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 1.1D), @@ -166,7 +166,7 @@ public void testExp() throws ScriptException { assertThat(((Double) context.getAttribute("d"))) .isCloseTo(0.367D, Percentage.withPercentage(1)); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(exp(ds[keep id, double1, long1]));"); + Object res = engine.eval("res := floor(exp(ds[keep double1, long1]));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 3L, "long1", 9223372036854775807L), @@ -194,7 +194,7 @@ public void testLn() throws ScriptException { .isCloseTo(-0.69D, Percentage.withPercentage(1)); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(ln(abs(ds[keep id, double1])));"); + Object res = engine.eval("res := floor(ln(abs(ds[keep double1])));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 0L), @@ -223,7 +223,7 @@ public void testRound() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(12350D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(ds[keep id, long1, double2], 1);"); + Object res = engine.eval("res := round(ds[keep long1, double2], 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 30.0D, "double2", 1.2D), @@ -260,7 +260,7 @@ public void testTrunc() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(12340D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(ds[keep id, long1, double2], 1);"); + Object res = engine.eval("res := trunc(ds[keep long1, double2], 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 30.0D, "double2", 1.2D), @@ -291,7 +291,7 @@ public void testSqrt() throws ScriptException { assertThat(context.getAttribute("c")).isEqualTo(0D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(sqrt(ds[keep id, long1, double2]));"); + Object res = engine.eval("res := round(sqrt(ds[keep long1, double2]));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 5.0D, "double2", 1D), @@ -323,7 +323,7 @@ public void testMod() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(9D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(mod(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(mod(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 0.0D, "double2", 1.2D), @@ -355,7 +355,7 @@ public void testPower() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(-125D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(power(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(power(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 900.0D, "double2", 1.4D), @@ -385,7 +385,7 @@ public void testRandom() throws ScriptException { assertThat((Double) context.getAttribute("a")).isCloseTo(0.72D, Percentage.withPercentage(0.2)); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(random(ds[keep id, long1], 200), 1);"); + Object res = engine.eval("res := trunc(random(ds[keep long1], 200), 1);"); assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Double.class); assertThatThrownBy( @@ -412,7 +412,7 @@ public void testLog() throws ScriptException { .isCloseTo(3.01D, Percentage.withPercentage(0.01)); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(log(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(log(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 4.9D, "double2", 0.2D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java index 738aaacbd..8128e7253 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java @@ -86,12 +86,12 @@ public void testUnaryStringFunction() throws ScriptException { context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.eval( - "dsTrim := trim(ds[keep id, string1]); " - + "dsLTrim := ltrim(ds[keep id, string1]); " - + "dsRTrim := rtrim(ds[keep id, string1]); " - + "dsUpper := upper(ds[keep id, string1]); " - + "dsLower := lower(ds[keep id, string1]); " - + "dsLen := length(ds[keep id, string1]);"); + "dsTrim := trim(ds[keep string1]); " + + "dsLTrim := ltrim(ds[keep string1]); " + + "dsRTrim := rtrim(ds[keep string1]); " + + "dsUpper := upper(ds[keep string1]); " + + "dsLower := lower(ds[keep string1]); " + + "dsLen := length(ds[keep string1]);"); assertThat(((Dataset) context.getAttribute("dsTrim")).getDataAsMap().get(0)) .isEqualTo(Map.of("id", "Toto", "string1", "toto")); assertThat(((Dataset) context.getAttribute("dsLTrim")).getDataAsMap().get(0)) @@ -127,7 +127,7 @@ public void testSubstrAtom() throws ScriptException { assertThat(context.getAttribute("s1")).isEqualTo(""); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := substr(ds[keep id, string1, string2], 2, 4);"); + Object res = engine.eval("res := substr(ds[keep string1, string2], 2, 4);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", "oto", "string2", ""), @@ -146,7 +146,7 @@ public void testReplaceAtom() throws ScriptException { assertThat(context.getAttribute("r2")).isEqualTo("de"); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := replace(ds[keep id, string1, string2], \"o\", \"O\");"); + Object res = engine.eval("res := replace(ds[keep string1, string2], \"o\", \"O\");"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", "tOtO", "string2", "t"), @@ -182,7 +182,7 @@ public void testInstrAtom() throws ScriptException { assertThat(context.getAttribute("i4")).isEqualTo(0L); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := instr(ds[keep id, string1, string2], \"o\", 0, 2);"); + Object res = engine.eval("res := instr(ds[keep string1, string2], \"o\", 0, 2);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", 4L, "string2", 0L), From 57ddc25b1fb292624748c322db4b014a9d4c3bda Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Mon, 25 Aug 2025 17:08:21 +0200 Subject: [PATCH 03/15] Upgrade fusion-sdmx-ml to 1.1.10 --- vtl-sdmx/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vtl-sdmx/pom.xml b/vtl-sdmx/pom.xml index a1e9d56cf..d2b132003 100644 --- a/vtl-sdmx/pom.xml +++ b/vtl-sdmx/pom.xml @@ -35,7 +35,7 @@ io.sdmx fusion-sdmx-ml - 1.1.9-SNAPSHOT + 1.1.10 fr.insee.trevas From 17fd0c7919febcce7e467cd649814417f54c3010 Mon Sep 17 00:00:00 2001 From: MiguelRosaTauroni Date: Tue, 26 Aug 2025 09:05:18 +0000 Subject: [PATCH 04/15] discarded (additional) calc operator checks --- .../vtl/engine/visitors/ClauseVisitor.java | 47 ++++--------------- .../engine/visitors/ClauseVisitorTest.java | 26 ---------- 2 files changed, 8 insertions(+), 65 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index c9e04e2cd..bc156ac21 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -194,6 +194,8 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext return processingEngine.executeProject(datasetExpression, columnNames); + } catch (VtlRuntimeException e) { + throw e; } catch (Exception e) { String errorMsg = String.format( @@ -253,33 +255,9 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } - // ---- Validate: identifiers must not be defined or overwritten by CALC ---- - // (VTL 2.x typical constraint: CALC creates/updates measures/attributes, not identifiers) - if (columnRole == Dataset.Role.IDENTIFIER) { - final String errorMsg = - String.format( - "Error: CALC must not define an IDENTIFIER component: '%s'. Line %d, position %d. Statement: [%s]", - columnName, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } - // If the target already exists in the dataset, check its role final Dataset.Component existing = byName.get(columnName); if (existing != null) { - // Disallow changing the role through CALC (keep roles stable) - if (existing.getRole() != columnRole) { - final String meta = - String.format( - "existing(role=%s, type=%s) vs. requested(role=%s)", - existing.getRole(), - existing.getType() != null ? existing.getType() : "n/a", - columnRole); - final String errorMsg = - String.format( - "Error: role change via CALC is not allowed for '%s' (%s). Line %d, position %d. Statement: [%s]", - columnName, meta, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } // Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER). if (existing.getRole() == Dataset.Role.IDENTIFIER) { final String meta = @@ -292,16 +270,6 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { columnName, meta, line, charPosition, statement); throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } - - // NOTE: If you want to FORBID overwriting any existing non-identifier column name, - // uncomment the following block: - // { - // final String msg = String.format( - // "Error: target '%s' already exists; overwriting via CALC is not allowed. Line - // %d, position %d. Statement: [%s]", - // columnName, line, charPosition, statement); - // throw new RuntimeException(msg); - // } } // ---- Dispatch: analytics vs. regular calc ---- @@ -320,11 +288,6 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { (VtlParser.AnalyticFunctionsContext) functionExprCtx.functions(); currentDatasetExpression = analyticsVisitor.visit(anFuncCtx); - - // Optional: If analytics implicitly create/overwrite a column, you may want to enforce - // the same role checks here based on columnRole and existing metadata. - // That requires retrieving schema from currentDatasetExpression if needed. - } else { // Regular calc expression – build resolvable expression and capture its source text final ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); @@ -364,6 +327,8 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { return currentDatasetExpression; + } catch (VtlRuntimeException e) { + throw e; } catch (Exception e) { final String errorMsg = String.format( @@ -386,6 +351,8 @@ public DatasetExpression visitFilterClause(VtlParser.FilterClauseContext ctx) { ResolvableExpression filter = componentExpressionVisitor.visit(ctx.expr()); return processingEngine.executeFilter(datasetExpression, filter, getSource(ctx.expr())); + } catch (VtlRuntimeException e) { + throw e; } catch (Exception e) { String errorMsg = String.format( @@ -498,6 +465,8 @@ public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { // Execute rename in processing engine return processingEngine.executeRename(datasetExpression, fromTo); + } catch (VtlRuntimeException e) { + throw e; } catch (Exception e) { String errorMsg = String.format( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index beba9d9b2..7ead24a6f 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -85,32 +85,6 @@ public void testManyCalc() throws ScriptException { Map.of("name", "Franck", "weight", 9L, "wisdom", 24L)); } - /** - * CALC: creating an IDENTIFIER is forbidden by the updated ClauseVisitor. This must raise a - * script error. - */ - @Test - public void testCalcRoleModifier_identifierNotAllowed() { - InMemoryDataset dataset = - new InMemoryDataset( - List.of( - Map.of("name", "Hadrien", "age", 10L, "weight", 11L), - Map.of("name", "Nico", "age", 11L, "weight", 10L), - Map.of("name", "Franck", "age", 12L, "weight", 9L)), - Map.of("name", String.class, "age", Long.class, "weight", Long.class), - Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - - ScriptContext context = engine.getContext(); - context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - - assertThatThrownBy( - () -> - engine.eval( - "ds := ds1[calc new_age := age + 1, identifier id := name, attribute 'unit' := \"year\"];")) - .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("CALC must not define an IDENTIFIER component"); - } - /** CALC: measures/attributes are allowed and should be created as requested. */ @Test public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptException { From 9cb9216ade7968580ff77a5b66362fc6eccb6a3a Mon Sep 17 00:00:00 2001 From: MiguelRosaTauroni Date: Wed, 17 Sep 2025 13:43:38 +0000 Subject: [PATCH 05/15] PR addapted --- .../vtl/engine/visitors/ClauseVisitor.java | 564 ++++++++---------- .../engine/visitors/ClauseVisitorTest.java | 76 ++- .../functions/JoinFunctionsTest.java | 3 +- 3 files changed, 316 insertions(+), 327 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index bc156ac21..bb7d3f1dc 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -107,235 +107,205 @@ private static AggregationExpression convertToAggregation( @Override public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext ctx) { - // Error reporting context - final int line = ctx.getStart().getLine(); - final int charPosition = ctx.getStart().getCharPositionInLine(); - final String statement = ctx.getText(); + // The type of the op can either be KEEP or DROP. + final boolean keep = ctx.op.getType() == VtlParser.KEEP; + + // Columns explicitly requested in the KEEP/DROP clause + final Set requestedNames = + ctx.componentID().stream() + .map(ClauseVisitor::getName) + .collect(Collectors.toCollection(LinkedHashSet::new)); // preserve user order + + // All available dataset components (ordered as in DataStructure) + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + final List allColumnsInOrder = + componentsInOrder.stream().map(Dataset.Component::getName).collect(Collectors.toList()); + final Set availableColumns = new LinkedHashSet<>(allColumnsInOrder); + + // Dataset identifiers (role = IDENTIFIER) + final Map identifiers = + componentsInOrder.stream() + .filter(c -> c.getRole() == Dataset.Role.IDENTIFIER) + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); - try { - // Why is this variable set to 'KEEP'? Where is it suppose to be called the drop function? - final boolean keep = ctx.op.getType() == VtlParser.KEEP; - - // Columns explicitly requested in the KEEP/DROP clause - final Set requestedNames = - ctx.componentID().stream() - .map(ClauseVisitor::getName) - .collect(Collectors.toCollection(LinkedHashSet::new)); // preserve user order - - // All available dataset components (ordered as in DataStructure) - final List componentsInOrder = - new ArrayList<>(datasetExpression.getDataStructure().values()); - final List allColumnsInOrder = - componentsInOrder.stream().map(Dataset.Component::getName).collect(Collectors.toList()); - final Set availableColumns = new LinkedHashSet<>(allColumnsInOrder); - - // Dataset identifiers (role = IDENTIFIER) - final Map identifiers = - componentsInOrder.stream() - .filter(c -> c.getRole() == Dataset.Role.IDENTIFIER) - .collect( - Collectors.toMap( - Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); - - // Evaluate that all requested columns must exist in the dataset or raise an error - for (String requested : requestedNames) { - if (!availableColumns.contains(requested)) { - String errorMsg = - String.format( - "Error: column '%s' not found in dataset. Line %d, position %d. Statement: [%s]", - requested, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + // Evaluate that all requested columns must exist in the dataset or raise an error + for (String requested : requestedNames) { + if (!availableColumns.contains(requested)) { + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "'%s' not found in dataset. Line %d, Statement: [%s]", + requested, ctx.getStart().getLine(), ctx.getText()), + fromContext(ctx))); } + } - // VTL specification: identifiers must not appear explicitly in KEEP - final Set forbidden = - requestedNames.stream() - .filter(identifiers::containsKey) - .collect(Collectors.toCollection(LinkedHashSet::new)); - - if (!forbidden.isEmpty()) { - StringBuilder details = new StringBuilder(); - for (String id : forbidden) { - Dataset.Component comp = identifiers.get(id); - details.append( - String.format( - "%s(role=%s, type=%s) ", - id, comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")); - } - - String errorMsg = + // VTL specification: identifiers must not appear explicitly in KEEP + final Set forbidden = + requestedNames.stream() + .filter(identifiers::containsKey) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + if (!forbidden.isEmpty()) { + StringBuilder details = new StringBuilder(); + for (String id : forbidden) { + Dataset.Component comp = identifiers.get(id); + details.append( String.format( - "Error: identifiers %s must not be explicitly listed in KEEP/DROP. Line %d, position %d. Statement: [%s]. Details: %s", - forbidden, line, charPosition, statement, details.toString().trim()); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); + "%s(role=%s, type=%s) ", + id, comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")); } + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "identifiers %s must not be explicitly listed in KEEP/DROP. Details: %s . Line %d, Statement: [%s]", + forbidden, details.toString().trim(), ctx.getStart().getLine(), ctx.getText()), + fromContext(ctx))); + } - // Build result set: - // + KEEP: identifiers + requested columns - // + DROP: (all columns - requested) + identifiers - final Set resultSet = new LinkedHashSet<>(); - if (keep) { - resultSet.addAll(identifiers.keySet()); - resultSet.addAll(requestedNames); - } else { - for (String col : allColumnsInOrder) { - if (!requestedNames.contains(col)) { - resultSet.add(col); - } + // Build result set: + // + KEEP: identifiers + requested columns + // + DROP: (all columns - requested) + identifiers + final Set resultSet = new LinkedHashSet<>(); + if (keep) { + resultSet.addAll(identifiers.keySet()); + resultSet.addAll(requestedNames); + } else { + for (String col : allColumnsInOrder) { + if (!requestedNames.contains(col)) { + resultSet.add(col); } - // Ensure identifiers are always present - resultSet.addAll(identifiers.keySet()); } - - // Materialize result respecting dataset structure order - final List columnNames = - allColumnsInOrder.stream().filter(resultSet::contains).collect(Collectors.toList()); - - return processingEngine.executeProject(datasetExpression, columnNames); - - } catch (VtlRuntimeException e) { - throw e; - } catch (Exception e) { - String errorMsg = - String.format( - "Unexpected error while processing KEEP/DROP clause at line %d, position %d. Statement: [%s]. Cause: %s", - line, charPosition, statement, e.getMessage()); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); + // Ensure identifiers are always present + resultSet.addAll(identifiers.keySet()); } + + // Materialize result respecting dataset structure order + final List columnNames = + allColumnsInOrder.stream().filter(resultSet::contains).collect(Collectors.toList()); + return processingEngine.executeProject(datasetExpression, columnNames); } @Override public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { - // Error reporting context - final int line = ctx.getStart().getLine(); - final int charPosition = ctx.getStart().getCharPositionInLine(); - final String statement = ctx.getText(); - - try { - - // Dataset structure (ordered) and quick lookups - final List componentsInOrder = - new ArrayList<>(datasetExpression.getDataStructure().values()); - - final Map byName = - componentsInOrder.stream() - .collect( - Collectors.toMap( - Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); - - // Accumulators for non-analytic calc items - final LinkedHashMap expressions = new LinkedHashMap<>(); - final LinkedHashMap expressionStrings = new LinkedHashMap<>(); - final LinkedHashMap roles = new LinkedHashMap<>(); + // Dataset structure (ordered) and quick lookups + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); - // Tracks duplicates in the same clause (target names) - final Set targetsSeen = new LinkedHashSet<>(); + final Map byName = + componentsInOrder.stream() + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); - // We need a rolling dataset expression to chain analytics items - DatasetExpression currentDatasetExpression = datasetExpression; + // Accumulators for non-analytic calc items + final LinkedHashMap expressions = new LinkedHashMap<>(); + final LinkedHashMap expressionStrings = new LinkedHashMap<>(); + final LinkedHashMap roles = new LinkedHashMap<>(); - // TODO: Refactor so we call executeCalc per CalcClauseItemContext (as analytics do). - for (VtlParser.CalcClauseItemContext calcCtx : ctx.calcClauseItem()) { + // Tracks duplicates in the same clause (target names) + final Set targetsSeen = new LinkedHashSet<>(); - // ---- Resolve target name and desired role ---- - final String columnName = getName(calcCtx.componentID()); - final Dataset.Role columnRole = - (calcCtx.componentRole() == null) - ? Dataset.Role.MEASURE - : Dataset.Role.valueOf(calcCtx.componentRole().getText().toUpperCase()); + // We need a rolling dataset expression to chain analytics items + DatasetExpression currentDatasetExpression = datasetExpression; - // ---- Validate: duplicate target in the same clause ---- - if (!targetsSeen.add(columnName)) { - final String errorMsg = - String.format( - "Error: duplicate target '%s' in CALC clause. Line %d, position %d. Statement: [%s]", - columnName, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + // TODO: Refactor so we call executeCalc per CalcClauseItemContext (as analytics do). + for (VtlParser.CalcClauseItemContext calcCtx : ctx.calcClauseItem()) { - // If the target already exists in the dataset, check its role - final Dataset.Component existing = byName.get(columnName); - if (existing != null) { - // Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER). - if (existing.getRole() == Dataset.Role.IDENTIFIER) { - final String meta = - String.format( - "(role=%s, type=%s)", - existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); - final String errorMsg = - String.format( - "Error: CALC cannot overwrite IDENTIFIER '%s' %s. Line %d, position %d. Statement: [%s]", - columnName, meta, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } - } + // ---- Resolve target name and desired role ---- + final String columnName = getName(calcCtx.componentID()); + final Dataset.Role columnRole = + (calcCtx.componentRole() == null) + ? Dataset.Role.MEASURE + : Dataset.Role.valueOf(calcCtx.componentRole().getText().toUpperCase()); - // ---- Dispatch: analytics vs. regular calc ---- - final boolean isAnalytic = - (calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) - && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() - instanceof VtlParser.AnalyticFunctionsContext; - - if (isAnalytic) { - // Analytics are executed immediately and update the rolling dataset expression - final AnalyticsVisitor analyticsVisitor = - new AnalyticsVisitor(processingEngine, currentDatasetExpression, columnName); - final VtlParser.FunctionsExpressionContext functionExprCtx = - (VtlParser.FunctionsExpressionContext) calcCtx.expr(); - final VtlParser.AnalyticFunctionsContext anFuncCtx = - (VtlParser.AnalyticFunctionsContext) functionExprCtx.functions(); - - currentDatasetExpression = analyticsVisitor.visit(anFuncCtx); - } else { - // Regular calc expression – build resolvable expression and capture its source text - final ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); - - final String exprSource = getSource(calcCtx.expr()); - if (exprSource == null || exprSource.isEmpty()) { - final String errorMsg = + // ---- Validate: duplicate target in the same clause ---- + if (!targetsSeen.add(columnName)) { + throw new VtlRuntimeException( + new InvalidArgumentException( String.format( - "Error: empty or unavailable source expression for '%s' in CALC. Line %d, position %d. Statement: [%s]", - columnName, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + "duplicate target '%s' in CALC clause. Line %d, Statement: [%s]", + columnName, ctx.getStart().getLine(), ctx.getText()), + fromContext(ctx))); + } - // Store in insertion order (deterministic column creation) - expressions.put(columnName, calc); - expressionStrings.put(columnName, exprSource); - roles.put(columnName, columnRole); + // If the target already exists in the dataset, check its role + final Dataset.Component existing = byName.get(columnName); + if (existing != null) { + // Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER). + if (existing.getRole() == Dataset.Role.IDENTIFIER) { + final String meta = + String.format( + "(role=%s, type=%s)", + existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "CALC cannot overwrite IDENTIFIER '%s' %s. Line %d, Statement: [%s]", + columnName, meta, ctx.getStart().getLine(), ctx.getText()), + fromContext(ctx))); } } - // ---- Consistency checks before execution ---- - if (!(expressions.keySet().equals(expressionStrings.keySet()) - && expressions.keySet().equals(roles.keySet()))) { - final String errorMsg = - String.format( - "Error: internal CALC maps out of sync (expressions/expressionStrings/roles). Line %d, position %d. Statement: [%s]", - line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + // ---- Dispatch: analytics vs. regular calc ---- + final boolean isAnalytic = + (calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) + && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() + instanceof VtlParser.AnalyticFunctionsContext; + + if (isAnalytic) { + // Analytics are executed immediately and update the rolling dataset expression + final AnalyticsVisitor analyticsVisitor = + new AnalyticsVisitor(processingEngine, currentDatasetExpression, columnName); + final VtlParser.FunctionsExpressionContext functionExprCtx = + (VtlParser.FunctionsExpressionContext) calcCtx.expr(); + final VtlParser.AnalyticFunctionsContext anFuncCtx = + (VtlParser.AnalyticFunctionsContext) functionExprCtx.functions(); + + currentDatasetExpression = analyticsVisitor.visit(anFuncCtx); + } else { + // Regular calc expression – build resolvable expression and capture its source text + final ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); + + final String exprSource = getSource(calcCtx.expr()); + if (exprSource == null || exprSource.isEmpty()) { + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "empty or unavailable source expression for '%s' in CALC. Line %d, Statement: [%s]", + columnName, ctx.getStart().getLine(), ctx.getText()), + fromContext(ctx))); + } - // ---- Execute the batch calc if any non-analytic expressions were collected ---- - if (!expressionStrings.isEmpty()) { - currentDatasetExpression = - processingEngine.executeCalc( - currentDatasetExpression, expressions, roles, expressionStrings); + // Store in insertion order (deterministic column creation) + expressions.put(columnName, calc); + expressionStrings.put(columnName, exprSource); + roles.put(columnName, columnRole); } + } - return currentDatasetExpression; + // ---- Consistency checks before execution ---- + if (!(expressions.keySet().equals(expressionStrings.keySet()) + && expressions.keySet().equals(roles.keySet()))) { + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "internal CALC maps out of sync (expressions/expressionStrings/roles). Line %d, Statement: [%s]", + ctx.getStart().getLine(), ctx.getText()), + fromContext(ctx))); + } - } catch (VtlRuntimeException e) { - throw e; - } catch (Exception e) { - final String errorMsg = - String.format( - "Unexpected error while processing CALC clause at line %d, position %d. Statement: [%s]. Cause: %s", - line, charPosition, statement, e.getMessage()); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); + // ---- Execute the batch calc if any non-analytic expressions were collected ---- + if (!expressionStrings.isEmpty()) { + currentDatasetExpression = + processingEngine.executeCalc( + currentDatasetExpression, expressions, roles, expressionStrings); } + return currentDatasetExpression; } @Override @@ -346,134 +316,104 @@ public DatasetExpression visitFilterClause(VtlParser.FilterClauseContext ctx) { final int charPosition = ctx.getStart().getCharPositionInLine(); final String statement = ctx.getText(); - try { - - ResolvableExpression filter = componentExpressionVisitor.visit(ctx.expr()); - return processingEngine.executeFilter(datasetExpression, filter, getSource(ctx.expr())); - - } catch (VtlRuntimeException e) { - throw e; - } catch (Exception e) { - String errorMsg = - String.format( - "Unexpected error while processing FILTER clause at line %d, position %d. Statement: [%s]. Cause: %s", - line, charPosition, statement, e.getMessage()); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + ResolvableExpression filter = componentExpressionVisitor.visit(ctx.expr()); + return processingEngine.executeFilter(datasetExpression, filter, getSource(ctx.expr())); } @Override public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { - // Error reporting context - final int line = ctx.getStart().getLine(); - final int charPosition = ctx.getStart().getCharPositionInLine(); - final String statement = ctx.getText(); + // Dataset structure in order + lookup maps + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + final Set availableColumns = + componentsInOrder.stream() + .map(Dataset.Component::getName) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + // Map for detailed error reporting (includes role/type if available) + final Map byName = + componentsInOrder.stream() + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); - try { - - // Dataset structure in order + lookup maps - final List componentsInOrder = - new ArrayList<>(datasetExpression.getDataStructure().values()); - final Set availableColumns = - componentsInOrder.stream() - .map(Dataset.Component::getName) - .collect(Collectors.toCollection(LinkedHashSet::new)); - - // Map for detailed error reporting (includes role/type if available) - final Map byName = - componentsInOrder.stream() - .collect( - Collectors.toMap( - Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); - - // Parse the RENAME clause and validate - Map fromTo = new LinkedHashMap<>(); - Set toSeen = new LinkedHashSet<>(); - Set fromSeen = new LinkedHashSet<>(); - - for (VtlParser.RenameClauseItemContext renameCtx : ctx.renameClauseItem()) { - final String toNameString = getName(renameCtx.toName); - final String fromNameString = getName(renameCtx.fromName); - - // Validate: no duplicate "from" names inside the clause - if (!fromSeen.add(fromNameString)) { - String errorMsg = - String.format( - "Error: duplicate source name in RENAME clause: '%s'. Line %d, position %d. Statement: [%s]", - fromNameString, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + // Parse the RENAME clause and validate + Map fromTo = new LinkedHashMap<>(); + Set toSeen = new LinkedHashSet<>(); + Set fromSeen = new LinkedHashSet<>(); - // Validate: "from" must exist in dataset - if (!availableColumns.contains(fromNameString)) { - Dataset.Component comp = byName.get(fromNameString); - String meta = - (comp != null) - ? String.format( - " (role=%s, type=%s)", - comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") - : ""; - String errorMsg = - String.format( - "Error: source column to rename not found: '%s'%s. Line %d, position %d. Statement: [%s]", - fromNameString, meta, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + for (VtlParser.RenameClauseItemContext renameCtx : ctx.renameClauseItem()) { + final String toNameString = getName(renameCtx.toName); + final String fromNameString = getName(renameCtx.fromName); - // Validate: no duplicate "to" names inside the clause - if (!toSeen.add(toNameString)) { - String errorMsg = - String.format( - "Error: duplicate output column name in RENAME clause: '%s'. Line %d, position %d. Statement: [%s]", - fromNameString, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + // Validate: no duplicate "from" names inside the clause + if (!fromSeen.add(fromNameString)) { + String errorMsg = + String.format( + "Error: duplicate source name in RENAME clause: '%s. Line %d, Statement: [%s]", + fromNameString, ctx.getStart().getLine(), ctx.getText()); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); + } - fromTo.put(fromNameString, toNameString); + // Validate: "from" must exist in dataset + if (!availableColumns.contains(fromNameString)) { + Dataset.Component comp = byName.get(fromNameString); + String meta = + (comp != null) + ? String.format( + " (role=%s, type=%s)", + comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") + : ""; + String errorMsg = + String.format( + "Error: source column to rename not found: '%s'%s. Line %d, Statement: [%s]", + fromNameString, meta, ctx.getStart().getLine(), ctx.getText()); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } - // Validate collisions with untouched dataset columns ("Untouched" = columns that are not - // being renamed) - final Set untouched = - availableColumns.stream() - .filter(c -> !fromTo.containsKey(c)) - .collect(Collectors.toCollection(LinkedHashSet::new)); - - for (Map.Entry e : fromTo.entrySet()) { - final String from = e.getKey(); - final String to = e.getValue(); - - // If target already exists as untouched, it would cause a collision - if (untouched.contains(to)) { - Dataset.Component comp = byName.get(to); - String meta = - (comp != null) - ? String.format( - " (role=%s, type=%s)", - comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") - : ""; - String errorMsg = - String.format( - "Error: target name '%s'%s already exists in dataset and is not being renamed. " - + "Line %d, position %d. Statement: [%s]", - to, meta, line, charPosition, statement); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); - } + // Validate: no duplicate "to" names inside the clause + if (!toSeen.add(toNameString)) { + String errorMsg = + String.format( + "Error: duplicate output column name in RENAME clause: '%s. Line %d, Statement: [%s]", + fromNameString, ctx.getStart().getLine(), ctx.getText()); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); } - // Execute rename in processing engine - return processingEngine.executeRename(datasetExpression, fromTo); - - } catch (VtlRuntimeException e) { - throw e; - } catch (Exception e) { - String errorMsg = - String.format( - "Unexpected error while processing RENAME clause at line %d, position %d. Statement: [%s]. Cause: %s", - line, charPosition, statement, e.getMessage()); - throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); + fromTo.put(fromNameString, toNameString); } + + // Validate collisions with untouched dataset columns ("Untouched" = columns that are not + // being renamed) + final Set untouched = + availableColumns.stream() + .filter(c -> !fromTo.containsKey(c)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + for (Map.Entry e : fromTo.entrySet()) { + final String from = e.getKey(); + final String to = e.getValue(); + + // If target already exists as untouched, it would cause a collision + if (untouched.contains(to)) { + Dataset.Component comp = byName.get(to); + String meta = + (comp != null) + ? String.format( + " (role=%s, type=%s)", + comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") + : ""; + String errorMsg = + String.format( + "Error: target name '%s'%s already exists in dataset and is not being renamed. Line %d, Statement: [%s]", + to, meta, ctx.getStart().getLine(), ctx.getText()); + throw new VtlRuntimeException(new InvalidArgumentException(errorMsg, fromContext(ctx))); + } + } + + // Execute rename in processing engine + return processingEngine.executeRename(datasetExpression, fromTo); } @Override diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index 7ead24a6f..0a60195d5 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -120,8 +120,9 @@ public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptExceptio assertThat(unitComponent.getRole()).isEqualTo(Role.ATTRIBUTE); } + /** RENAME: duplicate "to" name inside the clause must raise a detailed script error. */ @Test - public void testRenameClause() throws ScriptException { + public void testRenameClause_duplicateToNameShouldFail() { InMemoryDataset dataset = new InMemoryDataset( List.of( @@ -131,21 +132,67 @@ public void testRenameClause() throws ScriptException { Map.of("name", String.class, "age", Long.class, "weight", Long.class), Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - ScriptContext context = engine.getContext(); - context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - - engine.eval("ds := ds1[rename age to weight, weight to age, name to pseudo];"); - - assertThat(engine.getContext().getAttribute("ds")).isInstanceOf(Dataset.class); - assertThat(((Dataset) engine.getContext().getAttribute("ds")).getDataAsMap()) - .containsExactlyInAnyOrder( - Map.of("pseudo", "Hadrien", "weight", 10L, "age", 11L), - Map.of("pseudo", "Nico", "weight", 11L, "age", 10L), - Map.of("pseudo", "Franck", "weight", 12L, "age", 9L)); + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); assertThatThrownBy( () -> engine.eval("ds := ds1[rename age to weight, weight to age, name to age];")) - .isInstanceOf(VtlScriptException.class); + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("duplicate output column name in RENAME clause"); + } + + /** RENAME: duplicate "from" name inside the clause must raise a detailed script error. */ + @Test + public void testRenameClause_duplicateFromNameShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename age to weight, age to weight2];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("duplicate source name in RENAME clause"); + } + + /** RENAME: "from" column must exist in dataset. */ + @Test + public void testRenameClause_fromColumnNotFoundShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename unknown to something];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("source column to rename not found: 'unknown'"); + } + + /** + * RENAME: target collides with an untouched existing column -> must error with details + * (role/type). + */ + @Test + public void testRenameClause_targetCollidesWithUntouchedShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename name to age];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("target name 'age'") // main message + .hasMessageContaining("already exists in dataset and is not being renamed") + .hasMessageContaining("(role=MEASURE, type=class java.lang.Long)"); } @Test @@ -223,7 +270,8 @@ public void testKeepDropClause_identifierExplicitShouldFail() { context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); assertThatThrownBy(() -> engine.eval("ds := ds1[keep name, age];")) - .isInstanceOf(VtlScriptException.class); + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("identifiers [name] must not be explicitly listed in KEEP/DROP"); } @Test diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java index 8e69aeea2..7634c3d21 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java @@ -136,7 +136,8 @@ public void testLeftJoinWithDifferentIdentifiers() throws ScriptException { ds_1 := ds_1[calc measure Id_2 := Id_2]; result := left_join(ds_1, ds_2 using Id_2);\ """)) - .isInstanceOf(InvalidArgumentException.class); + .isInstanceOf(InvalidArgumentException.class) + .hasMessageContaining("CALC cannot overwrite IDENTIFIER"); } @Test From c82e0aa74e4d8bf2001209b2a8546f9191cf026c Mon Sep 17 00:00:00 2001 From: MiguelRosaTauroni Date: Mon, 25 Aug 2025 07:20:15 +0000 Subject: [PATCH 06/15] The functions keep, rename, filter, and calc have been adapted to comply with the requirements of the VTL 2.1 specification. In addition, an error handling mechanism has been incorporated into all the aforementioned functions, providing both the cause and the statement that triggered the error in the event of a runtime failure Unitary tests adapted --- .../vtl/engine/visitors/ClauseVisitor.java | 274 +++++++++++++++--- .../engine/visitors/ClauseVisitorTest.java | 100 +++++-- .../ArithmeticExprOrConcatTest.java | 6 +- .../expression/ArithmeticExprTest.java | 6 +- .../visitors/expression/BooleanExprTest.java | 4 +- .../expression/ComparisonExprTest.java | 14 +- .../expression/ConditionalExprTest.java | 16 +- .../visitors/expression/UnaryExprTest.java | 4 +- .../functions/ComparisonFunctionsTest.java | 7 +- .../functions/DistanceFunctionsTest.java | 2 +- .../functions/JoinFunctionsTest.java | 10 +- .../functions/NumericFunctionsTest.java | 24 +- .../functions/StringFunctionsTest.java | 18 +- 13 files changed, 378 insertions(+), 107 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index fefde8900..8319a1c3b 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -106,83 +106,291 @@ private static AggregationExpression convertToAggregation( @Override public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext ctx) { - // Normalize to keep operation. - var keep = ctx.op.getType() == VtlParser.KEEP; - var names = ctx.componentID().stream().map(ClauseVisitor::getName).collect(Collectors.toSet()); - List columnNames = - datasetExpression.getDataStructure().values().stream() - .map(Dataset.Component::getName) - .filter(name -> keep == names.contains(name)) - .collect(Collectors.toList()); - return processingEngine.executeProject(datasetExpression, columnNames); + // The type of the op can either be KEEP or DROP. + final boolean keep = ctx.op.getType() == VtlParser.KEEP; + + // Columns explicitly requested in the KEEP/DROP clause + final List columnNames = + ctx.componentID().stream().map(ClauseVisitor::getName).toList(); + + // All available dataset components + final List inputColumnDataTypes = + new ArrayList<>(datasetExpression.getDataStructure().values()); + final List inputColumns = + inputColumnDataTypes.stream().map(Dataset.Component::getName).toList(); + + // Dataset identifiers (role = IDENTIFIER) + final Map identifiers = + inputColumnDataTypes.stream() + .filter(c -> c.getRole() == Dataset.Role.IDENTIFIER) + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + + // Evaluate that all requested columns must exist in the dataset or raise an error + for (String requested : columnNames) { + if (!inputColumns.contains(requested)) { + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format("'%s' not found in dataset.", requested), fromContext(ctx))); + } + } + + // VTL specification: identifiers must not appear explicitly in KEEP + final Set forbidden = + columnNames.stream() + .filter(identifiers::containsKey) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + if (!forbidden.isEmpty()) { + StringBuilder details = new StringBuilder(); + for (String id : forbidden) { + Dataset.Component comp = identifiers.get(id); + details.append( + String.format( + "%s(role=%s, type=%s) ", + id, comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")); + } + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "identifiers %s must not be explicitly listed in KEEP/DROP. Details: %s", + forbidden, details.toString().trim()), + fromContext(ctx))); + } + + // Build result set: + // + KEEP: identifiers + requested columns + // + DROP: (all columns - requested) + identifiers + final Set resultSet = new LinkedHashSet<>(); + resultSet.addAll(identifiers.keySet()); + if (keep) { + resultSet.addAll(columnNames); + } else { + for (String col : inputColumns) { + if (!columnNames.contains(col)) { + resultSet.add(col); + } + } + } + + // Retrieve the output column names (identifiers + requested) + final List outputColumns = + inputColumns.stream().filter(resultSet::contains).collect(Collectors.toList()); + return processingEngine.executeProject(datasetExpression, outputColumns); } @Override public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { - var expressions = new LinkedHashMap(); - var expressionStrings = new LinkedHashMap(); - var roles = new LinkedHashMap(); - var currentDatasetExpression = datasetExpression; - // TODO: Refactor so we call the executeCalc for each CalcClauseItemContext the same way we call - // the - // analytics functions. + // Dataset structure (ordered) and quick lookups + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + + final Map byName = + componentsInOrder.stream() + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + + // Accumulators for non-analytic calc items + final LinkedHashMap expressions = new LinkedHashMap<>(); + final LinkedHashMap expressionStrings = new LinkedHashMap<>(); + final LinkedHashMap roles = new LinkedHashMap<>(); + + // Tracks duplicates in the same clause (target names) + final Set targetsSeen = new LinkedHashSet<>(); + + // We need a rolling dataset expression to chain analytics items + DatasetExpression currentDatasetExpression = datasetExpression; + + // TODO: Refactor so we call executeCalc per CalcClauseItemContext (as analytics do). for (VtlParser.CalcClauseItemContext calcCtx : ctx.calcClauseItem()) { - var columnName = getName(calcCtx.componentID()); - var columnRole = - calcCtx.componentRole() == null + + // ---- Resolve target name and desired role ---- + final String columnName = getName(calcCtx.componentID()); + final Dataset.Role columnRole = + (calcCtx.componentRole() == null) ? Dataset.Role.MEASURE : Dataset.Role.valueOf(calcCtx.componentRole().getText().toUpperCase()); - if ((calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) - && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() - instanceof VtlParser.AnalyticFunctionsContext) { - AnalyticsVisitor analyticsVisitor = + // If the target already exists in the dataset, check its role + final Dataset.Component existing = byName.get(columnName); + if (existing != null) { + // Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER). + if (existing.getRole() == Dataset.Role.IDENTIFIER) { + final String meta = + String.format( + "(role=%s, type=%s)", + existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format("CALC cannot overwrite IDENTIFIER '%s' %s.", columnName, meta), + fromContext(ctx))); + } + } + + // ---- Dispatch: analytics vs. regular calc ---- + final boolean isAnalytic = + (calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) + && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() + instanceof VtlParser.AnalyticFunctionsContext; + + if (isAnalytic) { + // Analytics are executed immediately and update the rolling dataset expression + final AnalyticsVisitor analyticsVisitor = new AnalyticsVisitor(processingEngine, currentDatasetExpression, columnName); - VtlParser.FunctionsExpressionContext functionExprCtx = + final VtlParser.FunctionsExpressionContext functionExprCtx = (VtlParser.FunctionsExpressionContext) calcCtx.expr(); - VtlParser.AnalyticFunctionsContext anFuncCtx = + final VtlParser.AnalyticFunctionsContext anFuncCtx = (VtlParser.AnalyticFunctionsContext) functionExprCtx.functions(); + currentDatasetExpression = analyticsVisitor.visit(anFuncCtx); } else { - ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); + // Regular calc expression – build resolvable expression and capture its source text + final ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); + + final String exprSource = getSource(calcCtx.expr()); + if (exprSource == null || exprSource.isEmpty()) { + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "empty or unavailable source expression for '%s' in CALC.", columnName), + fromContext(ctx))); + } + // Store in insertion order (deterministic column creation) expressions.put(columnName, calc); - expressionStrings.put(columnName, getSource(calcCtx.expr())); + expressionStrings.put(columnName, exprSource); roles.put(columnName, columnRole); } } + // ---- Consistency checks before execution ---- + if (!(expressions.keySet().equals(expressionStrings.keySet()) + && expressions.keySet().equals(roles.keySet()))) { + throw new VtlRuntimeException( + new InvalidArgumentException( + "internal CALC maps out of sync (expressions/expressionStrings/roles)", + fromContext(ctx))); + } + + // ---- Execute the batch calc if any non-analytic expressions were collected ---- if (!expressionStrings.isEmpty()) { currentDatasetExpression = processingEngine.executeCalc( currentDatasetExpression, expressions, roles, expressionStrings); } - return currentDatasetExpression; } @Override public DatasetExpression visitFilterClause(VtlParser.FilterClauseContext ctx) { + + // Error reporting context + final int line = ctx.getStart().getLine(); + final int charPosition = ctx.getStart().getCharPositionInLine(); + final String statement = ctx.getText(); + ResolvableExpression filter = componentExpressionVisitor.visit(ctx.expr()); return processingEngine.executeFilter(datasetExpression, filter, getSource(ctx.expr())); } @Override public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { + + // Dataset structure in order + lookup maps + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + final Set availableColumns = + componentsInOrder.stream() + .map(Dataset.Component::getName) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + // Map for detailed error reporting (includes role/type if available) + final Map byName = + componentsInOrder.stream() + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + + // Parse the RENAME clause and validate Map fromTo = new LinkedHashMap<>(); - Set renamed = new HashSet<>(); + Set toSeen = new LinkedHashSet<>(); + Set fromSeen = new LinkedHashSet<>(); + for (VtlParser.RenameClauseItemContext renameCtx : ctx.renameClauseItem()) { - var toNameString = getName(renameCtx.toName); - var fromNameString = getName(renameCtx.fromName); - if (!renamed.add(toNameString)) { + final String toNameString = getName(renameCtx.toName); + final String fromNameString = getName(renameCtx.fromName); + + // Validate: no duplicate "from" names inside the clause + if (!fromSeen.add(fromNameString)) { throw new VtlRuntimeException( new InvalidArgumentException( - "duplicate column: %s".formatted(toNameString), fromContext(renameCtx))); + String.format("Error: duplicate source name in RENAME clause: '%s", fromNameString), + fromContext(ctx))); } + + // Validate: "from" must exist in dataset + if (!availableColumns.contains(fromNameString)) { + Dataset.Component comp = byName.get(fromNameString); + String meta = + (comp != null) + ? String.format( + " (role=%s, type=%s)", + comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") + : ""; + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "Error: source column to rename not found: '%s'%s", fromNameString, meta), + fromContext(ctx))); + } + + // Validate: no duplicate "to" names inside the clause + if (!toSeen.add(toNameString)) { + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "Error: duplicate output column name in RENAME clause: '%s.", fromNameString), + fromContext(ctx))); + } + fromTo.put(fromNameString, toNameString); } + + // Validate collisions with untouched dataset columns ("Untouched" = columns that are not + // being renamed) + final Set untouched = + availableColumns.stream() + .filter(c -> !fromTo.containsKey(c)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + for (Map.Entry e : fromTo.entrySet()) { + final String from = e.getKey(); + final String to = e.getValue(); + + // If target already exists as untouched, it would cause a collision + if (untouched.contains(to)) { + Dataset.Component comp = byName.get(to); + String meta = + (comp != null) + ? String.format( + " (role=%s, type=%s)", + comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") + : ""; + + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "Error: target name '%s'%s already exists in dataset and is not being renamed.", + to, meta), + fromContext(ctx))); + } + } + + // Execute rename in processing engine return processingEngine.executeRename(datasetExpression, fromTo); } diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index 5c25d981b..a4127d2bd 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -1,6 +1,5 @@ package fr.insee.vtl.engine.visitors; -import static fr.insee.vtl.engine.VtlScriptEngineTest.atPosition; import static fr.insee.vtl.model.Dataset.Role; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -86,8 +85,9 @@ public void testManyCalc() throws ScriptException { Map.of("name", "Franck", "weight", 9L, "wisdom", 24L)); } + /** CALC: measures/attributes are allowed and should be created as requested. */ @Test - public void testCalcRoleModifier() throws ScriptException { + public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptException { InMemoryDataset dataset = new InMemoryDataset( List.of( @@ -125,8 +125,9 @@ public void testCalcRoleModifier() throws ScriptException { assertThat(unitComponent.getRole()).isEqualTo(Role.ATTRIBUTE); } + /** RENAME: duplicate "to" name inside the clause must raise a detailed script error. */ @Test - public void testRenameClause() throws ScriptException { + public void testRenameClause_duplicateToNameShouldFail() { InMemoryDataset dataset = new InMemoryDataset( List.of( @@ -136,23 +137,67 @@ public void testRenameClause() throws ScriptException { Map.of("name", String.class, "age", Long.class, "weight", Long.class), Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - ScriptContext context = engine.getContext(); - context.setAttribute("ds", dataset, ScriptContext.ENGINE_SCOPE); + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval("ds1 := ds[rename age to weight, weight to age, name to pseudo];"); + assertThatThrownBy( + () -> engine.eval("ds := ds1[rename age to weight, weight to age, name to age];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("duplicate output column name in RENAME clause"); + } - assertThat(engine.getContext().getAttribute("ds1")).isInstanceOf(Dataset.class); - assertThat(((Dataset) engine.getContext().getAttribute("ds1")).getDataAsMap()) - .containsExactlyInAnyOrder( - Map.of("pseudo", "Hadrien", "weight", 10L, "age", 11L), - Map.of("pseudo", "Nico", "weight", 11L, "age", 10L), - Map.of("pseudo", "Franck", "weight", 12L, "age", 9L)); + /** RENAME: duplicate "from" name inside the clause must raise a detailed script error. */ + @Test + public void testRenameClause_duplicateFromNameShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - assertThatThrownBy( - () -> engine.eval("ds2 := ds[rename age to weight, weight to age, name to age];")) + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename age to weight, age to weight2];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("duplicate source name in RENAME clause"); + } + + /** RENAME: "from" column must exist in dataset. */ + @Test + public void testRenameClause_fromColumnNotFoundShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename unknown to something];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("source column to rename not found: 'unknown'"); + } + + /** + * RENAME: target collides with an untouched existing column -> must error with details + * (role/type). + */ + @Test + public void testRenameClause_targetCollidesWithUntouchedShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename name to age];")) .isInstanceOf(VtlScriptException.class) - .hasMessage("duplicate column: age") - .is(atPosition(0, 47, 58)); + .hasMessageContaining("target name 'age'") // main message + .hasMessageContaining("already exists in dataset and is not being renamed") + .hasMessageContaining("(role=MEASURE, type=class java.lang.Long)"); } @Test @@ -194,7 +239,8 @@ public void testKeepDropClause() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval("ds2 := ds1[keep name, age];"); + // KEEP: identifiers must not be listed explicitly; they are implicitly preserved. + engine.eval("ds2 := ds1[keep age];"); assertThat(engine.getContext().getAttribute("ds2")).isInstanceOf(Dataset.class); assertThat(((Dataset) engine.getContext().getAttribute("ds2")).getDataAsMap()) @@ -213,6 +259,26 @@ public void testKeepDropClause() throws ScriptException { Map.of("name", "Franck", "age", 12L)); } + /** KEEP/DROP: listing identifiers explicitly must raise a script error. */ + @Test + public void testKeepDropClause_identifierExplicitShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L), + Map.of("name", "Franck", "age", 12L, "weight", 9L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + ScriptContext context = engine.getContext(); + context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[keep name, age];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("identifiers [name] must not be explicitly listed in KEEP/DROP"); + } + @Test public void testAggregateType() { InMemoryDataset dataset = diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java index 3d61e46ba..d3cb18544 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java @@ -52,7 +52,7 @@ public void testPlus() throws ScriptException { assertThat(context.getAttribute("plus2")).isEqualTo(5.0); context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds1[keep id, long1, long2] + ds1[keep id, long1, long2];"); + Object res = engine.eval("res := ds1[keep long1, long2] + ds1[keep long1, long2];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 60L, "long2", 600L), @@ -75,7 +75,7 @@ public void testMinus() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds2[keep id, long1] - ds1[keep id, long1] + 1;"); + Object res = engine.eval("res := ds2[keep long1] - ds1[keep long1] + 1;"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 141L), @@ -92,7 +92,7 @@ public void testConcat() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds2[keep id, string1] || \" \" || ds1[keep id, string1];"); + Object res = engine.eval("res := ds2[keep string1] || \" \" || ds1[keep string1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "string1", "hadrien hadrien"), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java index c5822ea0f..353838880 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java @@ -64,8 +64,7 @@ public void testArithmeticExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = - engine.eval("res := round(ds1[keep id, long1, double1] * ds2[keep id, long1, double1]);"); + Object res = engine.eval("res := round(ds1[keep long1, double1] * ds2[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 1500.0, "double1", 1.0), @@ -83,8 +82,7 @@ public void testArithmeticExpr() throws ScriptException { engine.eval("div4 := 3.0 / 1.5;"); assertThat(context.getAttribute("div4")).isEqualTo(2.0); - res = - engine.eval("res2 := round(ds1[keep id, long1, double1] / ds2[keep id, long1, double1]);"); + res = engine.eval("res2 := round(ds1[keep long1, double1] / ds2[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 0.0, "double1", 1.0), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java index 7e7216f6d..d9ba19e7d 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java @@ -100,8 +100,8 @@ public void testOnDatasets() throws ScriptException { context.setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds_2", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, bool2][rename bool2 to bool1]; " - + "ds2 := ds_2[keep id, bool1]; " + "ds1 := ds_1[keep bool2][rename bool2 to bool1]; " + + "ds2 := ds_2[keep bool1]; " + "andDs := ds1 and ds2; " + "orDs := ds1 or ds2; " + "xorDs := ds1 xor ds2; "); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java index 70921ac91..297becc48 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java @@ -73,7 +73,7 @@ public void testComparisonExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("equal := ds1[keep id, long1] = ds2[keep id, long1];"); + engine.eval("equal := ds1[keep long1] = ds2[keep long1];"); var equal = engine.getContext().getAttribute("equal"); assertThat(((Dataset) equal).getDataAsMap()) .containsExactlyInAnyOrder( @@ -90,7 +90,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("long1")).isTrue(); engine.eval("mix1 := 6 <> (3*20.0);"); assertThat((Boolean) context.getAttribute("mix1")).isTrue(); - engine.eval("notEqual := ds1[keep id, long1] <> ds2[keep id, long1];"); + engine.eval("notEqual := ds1[keep long1] <> ds2[keep long1];"); var notEqual = engine.getContext().getAttribute("notEqual"); assertThat(((Dataset) notEqual).getDataAsMap()) .containsExactlyInAnyOrder( @@ -106,7 +106,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("lt1")).isFalse(); engine.eval("mix2 := 6 < 6.1;"); assertThat((Boolean) context.getAttribute("mix2")).isTrue(); - engine.eval("lt2 := ds1[keep id, long1] < ds2[keep id, long1];"); + engine.eval("lt2 := ds1[keep long1] < ds2[keep long1];"); var lt = engine.getContext().getAttribute("lt2"); assertThat(((Dataset) lt).getDataAsMap()) .containsExactlyInAnyOrder( @@ -122,7 +122,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("mt1")).isTrue(); engine.eval("mix4 := 6 > 6.1;"); assertThat((Boolean) context.getAttribute("mix4")).isFalse(); - engine.eval("mt2 := ds1[keep id, long1] > ds2[keep id, long1];"); + engine.eval("mt2 := ds1[keep long1] > ds2[keep long1];"); var mt = engine.getContext().getAttribute("mt2"); assertThat(((Dataset) mt).getDataAsMap()) .containsExactlyInAnyOrder( @@ -139,7 +139,7 @@ public void testComparisonExpr() throws ScriptException { engine.eval("mix5 := 6 <= 6.1;"); assertThat((Boolean) context.getAttribute("mix5")).isTrue(); - engine.eval("le2 := ds1[keep id, long1] <= ds2[keep id, long1];"); + engine.eval("le2 := ds1[keep long1] <= ds2[keep long1];"); var le = engine.getContext().getAttribute("le2"); assertThat(((Dataset) le).getDataAsMap()) .containsExactlyInAnyOrder( @@ -156,7 +156,7 @@ public void testComparisonExpr() throws ScriptException { engine.eval("mix6 := 6 >= 6.1;"); assertThat((Boolean) context.getAttribute("mix6")).isFalse(); - engine.eval("me2 := ds1[keep id, long1] >= ds2[keep id, long1];"); + engine.eval("me2 := ds1[keep long1] >= ds2[keep long1];"); var me = engine.getContext().getAttribute("me2"); assertThat(((Dataset) me).getDataAsMap()) .containsExactlyInAnyOrder( @@ -198,7 +198,7 @@ public void testInNotIn() throws ScriptException { assertThat((Boolean) engine.getContext().getAttribute("res4")).isTrue(); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("me := ds[keep id, long1, string1] in {\"toto\", \"franck\"};"); + engine.eval("me := ds[keep long1, string1] in {\"toto\", \"franck\"};"); var in = engine.getContext().getAttribute("me"); assertThat(((Dataset) in).getDataAsMap()) .containsExactlyInAnyOrder( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java index 2994c97c7..5ce05c628 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java @@ -46,8 +46,8 @@ public void testIfExpr() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + "ds1 := ds_1[keep long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep long1][rename long1 to bool_var]; " + "res := if ds1 > ds2 then ds1 else ds2;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) @@ -73,7 +73,7 @@ public void testCaseExpr() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, long1]; " + "ds1 := ds_1[keep long1]; " + "res <- ds1[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1];"); Object res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) @@ -84,7 +84,7 @@ public void testCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", "ok")); assertThat(((Dataset) res).getDataStructure().get("c").getType()).isEqualTo(String.class); engine.eval( - "ds2 := ds_1[keep id, long1]; " + "ds2 := ds_1[keep long1]; " + "res2 <- ds2[calc c := case when long1 > 30 then 1 else 0][drop long1];"); Object res2 = engine.getContext().getAttribute("res2"); assertThat(((Dataset) res2).getDataAsMap()) @@ -95,8 +95,8 @@ public void testCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", 1L)); assertThat(((Dataset) res2).getDataStructure().get("c").getType()).isEqualTo(Long.class); engine.eval( - "ds3 := ds_1[keep id, long1][rename long1 to bool_var];" - + "ds4 := ds_2[keep id, long1][rename long1 to bool_var]; " + "ds3 := ds_1[keep long1][rename long1 to bool_var];" + + "ds4 := ds_2[keep long1][rename long1 to bool_var]; " + "res_ds <- case when ds3 < 30 then ds3 else ds4;"); Object res_ds = engine.getContext().getAttribute("res_ds"); assertThat(((Dataset) res_ds).getDataAsMap()) @@ -115,7 +115,7 @@ public void testNvlExpr() throws ScriptException { assertThat(context.getAttribute("s2")).isEqualTo("default"); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(ds[keep id, long1], 0);"); + engine.eval("res := nvl(ds[keep long1], 0);"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -142,7 +142,7 @@ public void testNvlImplicitCast() throws ScriptException { assertThat(context.getAttribute("s2")).isEqualTo(1.1D); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(ds[keep id, long1], 0.1);"); + engine.eval("res := nvl(ds[keep long1], 0.1);"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java index 45ef35973..bbe9aea24 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java @@ -44,7 +44,7 @@ public void testUnaryExpr() throws ScriptException { assertThat(context.getAttribute("plus1")).isEqualTo(1.5D); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res1 := + ds2[keep id, long1, double1];"); + Object res = engine.eval("res1 := + ds2[keep long1, double1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D), @@ -57,7 +57,7 @@ public void testUnaryExpr() throws ScriptException { engine.eval("plus3 := - 1.5;"); assertThat(context.getAttribute("plus3")).isEqualTo(-1.5D); - res = engine.eval("res2 := - ds2[keep id, long1, double1];"); + res = engine.eval("res2 := - ds2[keep long1, double1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java index 9e307d085..e19a3d273 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java @@ -50,7 +50,7 @@ public void testBetweenAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("b2")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := between(ds[keep id, long1, double2], 5, 15);"); + Object res = engine.eval("res := between(ds[keep long1, double2], 5, 15);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", false, "double2", false), @@ -80,8 +80,7 @@ public void testCharsetMatchAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("t3")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = - engine.eval("res := match_characters(ds[keep id, string1, string2], \"(.*)o(.*)\");"); + Object res = engine.eval("res := match_characters(ds[keep string1, string2], \"(.*)o(.*)\");"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", true, "string2", false), @@ -114,7 +113,7 @@ public void testIsNullAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("n1")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := isnull(ds[keep id, string1, bool1]);"); + Object res = engine.eval("res := isnull(ds[keep string1, bool1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", false, "bool1", false), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java index f83d6837f..787e230b0 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java @@ -43,7 +43,7 @@ public void testLevenshteinAtom() throws ScriptException { context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); Object res = engine.eval( - "res := levenshtein(ds[keep id, string1], ds[keep id, string2][rename string2 to string1])[rename string1 to lev];"); + "res := levenshtein(ds[keep string1], ds[keep string2][rename string2 to string1])[rename string1 to lev];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "lev", 3L), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java index e79293263..f18c03612 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java @@ -137,7 +137,8 @@ public void testLeftJoinWithDifferentIdentifiers() throws ScriptException { result2 := left_join(ds_1_1, ds_2 using Id_2);\ """)) .isInstanceOf(InvalidArgumentException.class) - .hasMessage("using component Id_2 has to be an identifier"); + .hasMessage( + "CALC cannot overwrite IDENTIFIER 'Id_2' (role=IDENTIFIER, type=class java.lang.Long)."); } @Test @@ -230,9 +231,8 @@ public void testLeftJoinMixedStructure() throws ScriptException { engine.getContext().setAttribute("ds1", dataset1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds2", dataset2, ScriptContext.ENGINE_SCOPE); - engine.eval( - "unionData := union(ds1[keep id, measure1, measure2], ds2[keep id, measure1, measure2]);"); - engine.eval("ds1_keep := ds1[keep id, color];"); + engine.eval("unionData := union(ds1[keep measure1, measure2], ds2[keep measure1, measure2]);"); + engine.eval("ds1_keep := ds1[keep color];"); engine.eval("joinData := left_join(unionData, ds1_keep);"); Dataset joinData = (Dataset) engine.getBindings(ScriptContext.ENGINE_SCOPE).get("joinData"); @@ -260,7 +260,7 @@ public void testInnerJoin() throws ScriptException { engine.getContext().setAttribute("ds_1", ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("result1 := inner_join(ds_1[keep id1, id2, m1] as ds1, ds_2 as ds2);"); + engine.eval("result1 := inner_join(ds_1[keep m1] as ds1, ds_2 as ds2);"); var result = (Dataset) engine.getContext().getAttribute("result1"); assertThat(result.getColumnNames()).containsExactlyInAnyOrder("id1", "id2", "m1", "m2"); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java index d26b9617a..558e1f720 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java @@ -85,7 +85,7 @@ public void testCeil() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(0L); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ceil(ds[keep id, long1, double1]);"); + Object res = engine.eval("res := ceil(ds[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 2L), @@ -112,7 +112,7 @@ public void testFloor() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(-1L); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(ds[keep id, double1]);"); + Object res = engine.eval("res := floor(ds[keep double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 1L), @@ -135,7 +135,7 @@ public void testAbs() throws ScriptException { assertThat(context.getAttribute("b")).isEqualTo(5.5D); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := abs(ds[keep id, double1]);"); + Object res = engine.eval("res := abs(ds[keep double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 1.1D), @@ -164,7 +164,7 @@ public void testExp() throws ScriptException { assertThat(((Double) context.getAttribute("d"))) .isCloseTo(0.367D, Percentage.withPercentage(1)); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(exp(ds[keep id, double1, long1]));"); + Object res = engine.eval("res := floor(exp(ds[keep double1, long1]));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 3L, "long1", 9223372036854775807L), @@ -192,7 +192,7 @@ public void testLn() throws ScriptException { .isCloseTo(-0.69D, Percentage.withPercentage(1)); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(ln(abs(ds[keep id, double1])));"); + Object res = engine.eval("res := floor(ln(abs(ds[keep double1])));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 0L), @@ -221,7 +221,7 @@ public void testRound() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(12350D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(ds[keep id, long1, double2], 1);"); + Object res = engine.eval("res := round(ds[keep long1, double2], 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 30.0D, "double2", 1.2D), @@ -258,7 +258,7 @@ public void testTrunc() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(12340D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(ds[keep id, long1, double2], 1);"); + Object res = engine.eval("res := trunc(ds[keep long1, double2], 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 30.0D, "double2", 1.2D), @@ -289,7 +289,7 @@ public void testSqrt() throws ScriptException { assertThat(context.getAttribute("c")).isEqualTo(0D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(sqrt(ds[keep id, long1, double2]));"); + Object res = engine.eval("res := round(sqrt(ds[keep long1, double2]));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 5.0D, "double2", 1D), @@ -321,7 +321,7 @@ public void testMod() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(9D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(mod(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(mod(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 0.0D, "double2", 1.2D), @@ -353,7 +353,7 @@ public void testPower() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(-125D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(power(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(power(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 900.0D, "double2", 1.4D), @@ -383,7 +383,7 @@ public void testRandom() throws ScriptException { assertThat((Double) context.getAttribute("a")).isCloseTo(0.72D, Percentage.withPercentage(0.2)); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(random(ds[keep id, long1], 200), 1);"); + Object res = engine.eval("res := trunc(random(ds[keep long1], 200), 1);"); assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Double.class); assertThatThrownBy( @@ -410,7 +410,7 @@ public void testLog() throws ScriptException { .isCloseTo(3.01D, Percentage.withPercentage(0.01)); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(log(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(log(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 4.9D, "double2", 0.2D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java index d291258b9..516a48c45 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java @@ -86,12 +86,12 @@ public void testUnaryStringFunction() throws ScriptException { context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.eval( - "dsTrim := trim(ds[keep id, string1]); " - + "dsLTrim := ltrim(ds[keep id, string1]); " - + "dsRTrim := rtrim(ds[keep id, string1]); " - + "dsUpper := upper(ds[keep id, string1]); " - + "dsLower := lower(ds[keep id, string1]); " - + "dsLen := length(ds[keep id, string1]);"); + "dsTrim := trim(ds[keep string1]); " + + "dsLTrim := ltrim(ds[keep string1]); " + + "dsRTrim := rtrim(ds[keep string1]); " + + "dsUpper := upper(ds[keep string1]); " + + "dsLower := lower(ds[keep string1]); " + + "dsLen := length(ds[keep string1]);"); assertThat(((Dataset) context.getAttribute("dsTrim")).getDataAsMap().get(0)) .isEqualTo(Map.of("id", "Toto", "string1", "toto")); assertThat(((Dataset) context.getAttribute("dsLTrim")).getDataAsMap().get(0)) @@ -127,7 +127,7 @@ public void testSubstrAtom() throws ScriptException { assertThat(context.getAttribute("s5")).isEqualTo(""); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := substr(ds[keep id, string1, string2], 2, 4);"); + Object res = engine.eval("res := substr(ds[keep string1, string2], 2, 4);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", "oto", "string2", ""), @@ -146,7 +146,7 @@ public void testReplaceAtom() throws ScriptException { assertThat(context.getAttribute("r2")).isEqualTo("de"); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := replace(ds[keep id, string1, string2], \"o\", \"O\");"); + Object res = engine.eval("res := replace(ds[keep string1, string2], \"o\", \"O\");"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", "tOtO", "string2", "t"), @@ -182,7 +182,7 @@ public void testInstrAtom() throws ScriptException { assertThat(context.getAttribute("i4")).isEqualTo(0L); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := instr(ds[keep id, string1, string2], \"o\", 0, 2);"); + Object res = engine.eval("res := instr(ds[keep string1, string2], \"o\", 0, 2);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", 4L, "string2", 0L), From 36a4a787af906f638e547e9b98e791bd070f48d8 Mon Sep 17 00:00:00 2001 From: MiguelRosaTauroni Date: Mon, 29 Sep 2025 08:47:11 +0200 Subject: [PATCH 07/15] Included additional comments from the PR review --- .../main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index 8319a1c3b..b31063046 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -133,6 +133,7 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext if (!inputColumns.contains(requested)) { throw new VtlRuntimeException( new InvalidArgumentException( +// TODO: use actual column context. String.format("'%s' not found in dataset.", requested), fromContext(ctx))); } } @@ -157,6 +158,7 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext String.format( "identifiers %s must not be explicitly listed in KEEP/DROP. Details: %s", forbidden, details.toString().trim()), +// TODO: use actual column context. fromContext(ctx))); } @@ -226,6 +228,7 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); throw new VtlRuntimeException( new InvalidArgumentException( +// TODO: see if other cases are the same error (already defined in assignment for example). String.format("CALC cannot overwrite IDENTIFIER '%s' %s.", columnName, meta), fromContext(ctx))); } From 1572e613ab32fc6cd4c8918bfb325e98f8ca37f5 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Tue, 13 Jan 2026 10:27:02 +0100 Subject: [PATCH 08/15] Format --- .../java/fr/insee/vtl/engine/visitors/ClauseVisitor.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index b31063046..451dc56a1 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -133,7 +133,7 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext if (!inputColumns.contains(requested)) { throw new VtlRuntimeException( new InvalidArgumentException( -// TODO: use actual column context. + // TODO: use actual column context. String.format("'%s' not found in dataset.", requested), fromContext(ctx))); } } @@ -158,7 +158,7 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext String.format( "identifiers %s must not be explicitly listed in KEEP/DROP. Details: %s", forbidden, details.toString().trim()), -// TODO: use actual column context. + // TODO: use actual column context. fromContext(ctx))); } @@ -228,7 +228,8 @@ public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); throw new VtlRuntimeException( new InvalidArgumentException( -// TODO: see if other cases are the same error (already defined in assignment for example). + // TODO: see if other cases are the same error (already defined in assignment for + // example). String.format("CALC cannot overwrite IDENTIFIER '%s' %s.", columnName, meta), fromContext(ctx))); } From b380ca2635c617f39eafc0c6c62c45c6709a027d Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Tue, 13 Jan 2026 11:18:07 +0100 Subject: [PATCH 09/15] Adjust tests with bad keep, drop behaviour --- .../insee/vtl/engine/utils/dag/DagTest.java | 46 +- .../functions/SetFunctionsVisitorTest.java | 4 +- .../vtl/prov/ProvenanceListenerTest.java | 4 +- .../test/java/fr/insee/vtl/prov/RDFTest.java | 7 +- .../src/test/java/fr/insee/vtl/BPETest.java | 7 +- .../fr/insee/vtl/SDMXVTLWorkflowTest.java | 52 +- .../src/test/resources/DSD_BPE_CENSUS.xml | 785 +++++++++++------- .../fr/insee/vtl/spark/SparkDatasetTest.java | 3 +- .../processing.engine/OperatorsTest.java | 9 +- .../spark/processing.engine/ProjectTest.java | 2 +- .../processing.engine/ValidationTest.java | 10 +- 11 files changed, 547 insertions(+), 382 deletions(-) diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java index 841ba57e8..fcb4a6037 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java @@ -199,11 +199,11 @@ void testDagSimpleExampleWithReordering() throws ScriptException { void testDagCycle() { final String script = """ - e := a; - b := a; - c := b; - a := c; - f := a;"""; + e := a; + b := a; + c := b; + a := c; + f := a;"""; final Positioned.Position mainPosition = getPositionOfStatementInScript("a := c", script); final List otherPositions = @@ -231,14 +231,14 @@ void testDagCycle() { void testMultipleCycles() { final String script = """ - h := g; - i := join(h, input_ds); - g := i; - e := a; - b := a; - c := b; - a := c; - f := a;"""; + h := g; + i := join(h, input_ds); + g := i; + e := a; + b := a; + c := b; + a := c; + f := a;"""; final Positioned.Position mainExceptionMainPosition = getPositionOfStatementInScript("g := i", script); @@ -446,8 +446,8 @@ void testDagIfExpr() throws ScriptException { engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( "res := if ds1 > ds2 then ds1 else ds2; " - + "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep id, long1][rename long1 to bool_var];"); + + "ds1 := ds_1[keep long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep long1][rename long1 to bool_var];"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -463,7 +463,7 @@ void testDagCaseExpr() throws ScriptException { engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( "res0 <- tmp0[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1]; " - + "tmp0 := ds_1[keep id, long1];"); + + "tmp0 := ds_1[keep long1];"); Object res0 = engine.getContext().getAttribute("res0"); assertThat(((Dataset) res0).getDataAsMap()) .containsExactlyInAnyOrder( @@ -474,7 +474,7 @@ void testDagCaseExpr() throws ScriptException { assertThat(((Dataset) res0).getDataStructure().get("c").getType()).isEqualTo(String.class); engine.eval( "res1 <- tmp1[calc c := case when long1 > 30 then 1 else 0][drop long1]; " - + "tmp1 := ds_1[keep id, long1];"); + + "tmp1 := ds_1[keep long1];"); Object res1 = engine.getContext().getAttribute("res1"); assertThat(((Dataset) res1).getDataAsMap()) .containsExactlyInAnyOrder( @@ -484,9 +484,9 @@ void testDagCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", 1L)); assertThat(((Dataset) res1).getDataStructure().get("c").getType()).isEqualTo(Long.class); engine.eval( - "tmp2_alt_ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " + "tmp2_alt_ds1 := ds_1[keep long1][rename long1 to bool_var]; " + "res2 <- case when tmp2_alt_ds1 < 30 then tmp2_alt_ds1 else tmp2_alt_ds2; " - + "tmp2_alt_ds2 := ds_2[keep id, long1][rename long1 to bool_var];"); + + "tmp2_alt_ds2 := ds_2[keep long1][rename long1 to bool_var];"); Object resDs = engine.getContext().getAttribute("res2"); assertThat(((Dataset) resDs).getDataAsMap()) .containsExactlyInAnyOrder( @@ -498,7 +498,7 @@ void testDagCaseExpr() throws ScriptException { @Test void testDagNvlExpr() throws ScriptException { engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res <- nvl(tmp1[keep id, long1], 0); tmp1 := ds1;"); + engine.eval("res <- nvl(tmp1[keep long1], 0); tmp1 := ds1;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -512,7 +512,7 @@ void testDagNvlExpr() throws ScriptException { @Test void testDagNvlImplicitCast() throws ScriptException { engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(tmp1[keep id, long1], 0.1); tmp1 <- ds1;"); + engine.eval("res := nvl(tmp1[keep long1], 0.1); tmp1 <- ds1;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -528,7 +528,7 @@ void testDagUnaryExpr() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := + tmp1[keep id, long1, double1]; tmp1 <- ds2;"); + Object res = engine.eval("res := + tmp1[keep long1, double1]; tmp1 <- ds2;"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D), @@ -537,7 +537,7 @@ void testDagUnaryExpr() throws ScriptException { assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Long.class); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res2 = engine.eval("res2 := - tmp2[keep id, long1, double1]; tmp2 := ds2;"); + Object res2 = engine.eval("res2 := - tmp2[keep long1, double1]; tmp2 := ds2;"); assertThat(((Dataset) res2).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java index d548831ba..510b58d6d 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java @@ -265,9 +265,9 @@ public void testUnion456Issue() throws ScriptException { context.getBindings(ScriptContext.ENGINE_SCOPE).put("MULTIMODE", multimodeDs); engine.eval( - "TEMP_RACINE := MULTIMODE [keep interrogationId, FOO];\n" + "TEMP_RACINE := MULTIMODE [keep FOO];\n" + "RACINE := union(TEMP_RACINE, TEMP_RACINE) ;\n" - + "TEMP_LOOP := MULTIMODE [keep interrogationId, LOOP, LOOP.FOO1]\n" + + "TEMP_LOOP := MULTIMODE [keep LOOP.FOO1]\n" + " [filter LOOP <> \"\"]\n" + " [rename LOOP.FOO1 to FOO1];\n" + "LOOP <- union(TEMP_LOOP, TEMP_LOOP);"); diff --git a/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java b/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java index 3966b7e06..fb2f07326 100644 --- a/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java +++ b/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java @@ -95,10 +95,10 @@ public void testWithEmptyLines() throws ScriptException { """ - ds1 := data.ds1[calc identifier id := id, var1 := cast(var1, integer), var2 := cast(var2, integer)]; + ds1 := data.ds1[calc var1 := cast(var1, integer), var2 := cast(var2, integer)]; - ds2_out := other.ds2[calc identifier id := id, var1 := cast(var1, integer), var2 := cast(var2, integer)]; + ds2_out := other.ds2[calc var1 := cast(var1, integer), var2 := cast(var2, integer)]; ds_sum := ds1 + ds2_out; ds_mul <- ds_sum * 3; 'data.ds_res' <- ds_mul[filter mod(var1, 2) = 0][calc var_sum := var1 + var2]; diff --git a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java index 5560966a6..8d5870e1d 100644 --- a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java +++ b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java @@ -167,14 +167,13 @@ define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop] [filter year = "2021"] [calc pop := cast(pop, integer)] - [drop year, NB_COM, POP_MUNI]; + [drop NB_COM, POP_MUNI]; // Extract dataset on general practitioners from BPE by NUTS 3 in 2021 - GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = "D201" and year = "2021"] - [drop facility_type, year]; + GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = "D201" and year = "2021"]; // Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator - BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021) + BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3) [calc pract_per_10000_inhabitants := nb / pop * 10000] [drop nb, pop];\ """; diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java index 85168f61b..bce7edfa2 100644 --- a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java +++ b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java @@ -168,7 +168,7 @@ public void bpeV1() throws ScriptException { "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" + "[filter TIME_PERIOD = \"2021\"]\n" + "[calc pop := cast(pop, integer)]\n" - + "[drop TIME_PERIOD, NB_COM, POP_MUNI];"); + + "[drop NB_COM, POP_MUNI];"); Dataset censusNuts2021 = (Dataset) engine.getContext().getAttribute("CENSUS_NUTS3_2021"); Structured.DataStructure censusNuts2021Structure = censusNuts2021.getDataStructure(); @@ -181,8 +181,7 @@ public void bpeV1() throws ScriptException { // Step 7 engine.eval( - "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" - + "[drop facility_type, TIME_PERIOD];"); + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"];"); Dataset generalNuts = (Dataset) engine.getContext().getAttribute("GENERAL_PRACT_NUTS3_2021"); Structured.DataStructure generalNutsStructure = generalNuts.getDataStructure(); @@ -195,7 +194,7 @@ public void bpeV1() throws ScriptException { // Step 8 engine.eval( - "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3)\n" + "[calc pract_per_10000_inhabitants := nb / pop * 10000]\n" + "[drop nb, pop];"); diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java index c5f96d45f..d716fa4d2 100644 --- a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java +++ b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java @@ -16,6 +16,7 @@ import javax.script.ScriptEngineManager; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; public class SDMXVTLWorkflowTest { @@ -32,6 +33,8 @@ public void setUp() { engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark"); } + // Disabled for now, we have to update the remote resource + @Disabled @Test void testRefFromRepo() { @@ -61,6 +64,9 @@ void testGetEmptyDataset() { .isEqualTo( new Structured.DataStructure( List.of( + new Structured.Component( + "facility_type", String.class, Dataset.Role.IDENTIFIER), + new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER), new Structured.Component("nuts3", String.class, Dataset.Role.IDENTIFIER), new Structured.Component( "pract_per_10000_inhabitants", Double.class, Dataset.Role.MEASURE)))); @@ -89,14 +95,16 @@ public void testGetRulesetsVTL() { ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml"); SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of()); assertThat(sdmxVtlWorkflow.getRulesetsVTL()) - .isEqualTo( + .isEqualToIgnoringWhitespace( "define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is\n" - + " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" - + " end datapoint ruleset;\n" - + "\n" - + "define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" - + " BOWLING_ALLEY_RULE : when facility_type = \"F102\" then nb > 10 errorcode \"Not enough bowling alleys\"\n" - + " end datapoint ruleset;"); + + " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode\n" + + " \"Municipality code is not in the correct format\"\n" + + " end datapoint ruleset;\n" + + " \n" + + " define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" + + " BOWLING_ALLEY_RULE : when facility_type = \"F102\" then nb > 10 errorcode \"Not enough bowling\n" + + " alleys\"\n" + + " end datapoint ruleset;"); } @Test @@ -104,36 +112,38 @@ public void testGetTransformationsVTL() { ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml"); SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of()); assertThat(sdmxVtlWorkflow.getTransformationsVTL()) - .isEqualTo( + .isEqualToIgnoringWhitespace( "// Validation of municipality code in input file\n" + "CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid);\n" + "\n" + "// Clean BPE input database\n" - + "BPE_DETAIL_CLEAN := BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y]\n" - + " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" + + "BPE_DETAIL_CLEAN := BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y]\n" + + " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" + "\n" + "// BPE aggregation by municipality, type and year\n" - + "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD];\n" + + "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year\n" + + " to TIME_PERIOD];\n" + "\n" + "// BPE aggregation by NUTS 3, type and year\n" - + "BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" - + " [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" + + "BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3)\n" + + " else substr(municipality,1,2)]\n" + + " [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" + "\n" + "// BPE validation of facility types by NUTS 3\n" + "CHECK_NUTS3_TYPES := check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid);\n" + "\n" + "// Prepare 2021 census dataset by NUTS 3\n" - + "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" - + " [filter TIME_PERIOD = \"2021\"]\n" - + " [calc pop := cast(pop, integer)]\n" - + " [drop TIME_PERIOD, NB_COM, POP_MUNI];\n" + + "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" + + " [filter TIME_PERIOD = \"2021\"]\n" + + " [calc pop := cast(pop, integer)]\n" + + " [drop NB_COM, POP_MUNI];\n" + "\n" + "// Extract dataset on general practitioners from BPE by NUTS 3 in 2021\n" - + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" - + " [drop facility_type, TIME_PERIOD];\n" + + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"];\n" + "\n" - + "// Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator\n" - + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" + + "// Merge practitioners and legal population datasets by NUTS 3 in 2021\n" + + "// and compute an indicator\n" + + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3)\n" + " [calc pract_per_10000_inhabitants := nb / pop * 10000]\n" + " [drop nb, pop];"); } diff --git a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml index b16ece3e0..1d16e408d 100644 --- a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml +++ b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml @@ -1,206 +1,302 @@ - + DSD_BPE_DETAIL_1049 false 2024-03-26T10:49:00Z - - + + + - - - Type d'équipements - La classification des équipements en gammes a pour objectif de réunir des équipements qui présentent des logiques d'implantation voisines, en ce sens qu'ils sont fréquemment présents dans les mêmes communes. Ces regroupements permettent d'élaborer des indicateurs synthétiques reflétant l'organisation hiérarchisée des territoires en termes de services à la population. Les gammes d’équipements sont actualisées chaque année pour une nouvelle version de la Base Permanente des Équipements. En effet, d’une part de nouveaux équipements peuvent être introduits dans la base et, d’autre part, l’implantation des équipements dans les communes peut être modifiée, tout cela pouvant avoir des conséquences sur la composition des gammes. - - POLICE - - - - Code officiel géographique 2021 - Les objets géographiques contenus dans le COG sont les collectivités territoriales (communes, départements, régions, collectivités territoriales à statut particulier), les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. - - L'Abergement-Clémenciat - - + + + Type d'équipements + La classification des équipements en gammes a pour objectif de réunir des + équipements qui présentent des logiques d'implantation voisines, en ce sens qu'ils sont fréquemment + présents dans les mêmes communes. Ces regroupements permettent d'élaborer des indicateurs + synthétiques reflétant l'organisation hiérarchisée des territoires en termes de services à la + population. Les gammes d’équipements sont actualisées chaque année pour une nouvelle version de la + Base Permanente des Équipements. En effet, d’une part de nouveaux équipements peuvent être + introduits dans la base et, d’autre part, l’implantation des équipements dans les communes peut être + modifiée, tout cela pouvant avoir des conséquences sur la composition des gammes. + + + POLICE + + + + Code officiel géographique 2021 + Les objets géographiques contenus dans le COG sont les collectivités + territoriales (communes, départements, régions, collectivités territoriales à statut particulier), + les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et + certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont + répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements + intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. + + + L'Abergement-Clémenciat + + Code officiel géographique - Les objets géographiques contenus dans le COG sont les collectivités territoriales (communes, départements, régions, collectivités territoriales à statut particulier), les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. + Les objets géographiques contenus dans le COG sont les collectivités + territoriales (communes, départements, régions, collectivités territoriales à statut particulier), + les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et + certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont + répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements + intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. + Ain - - - - Ensemble des concepts de l'Insee - - Equipement - - - Type de l'équipement - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - - - - - Commune - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - - - - - Année de référence - - - Abscisse (Système de coordonnées Lambert 93) - - - Ordonnée (Système de coordonnées Lambert 93) - - + + + + Ensemble des concepts de l'Insee + + Equipement + + + Type de l'équipement + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + + + + + + Commune + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + + + + + + Année de référence + + + Abscisse (Système de coordonnées Lambert 93) + + + Ordonnée (Système de coordonnées Lambert 93) + + Code officiel géographique - + Année - + Nombre de communes - + Nombre d'équipement - + Nombre de médecins généralistes pour 10 OOO habitants - + Population municipale - + Population totale - - + + - + Geolocalized Equipments (geography 2021) - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).EQUIPEMENT - - + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).EQUIPEMENT + + + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + + - + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + + - + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + - + - + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_X - - - + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_X + + + + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_Y - - - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_Y + + + + - Cube populations légales - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TIME_PERIOD - - - - - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_COM - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_MUNI - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_TOT - - - - + isExternalReference="false" + agencyID="FR1" + id="LEGAL_POP_CUBE" + version="1.0"> + Cube populations légales + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TIME_PERIOD + + + + + + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_COM + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_MUNI + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_TOT + + + + + Cube BPE par ville - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT + @@ -251,201 +363,246 @@ version="1.0"> Cube BPE par nuts 3 - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + - + - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT + - + Cube médecins généralistes par habitants ventillé par nuts 3 en 2010 - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_GENERALISTES_PAR_HABITANTS + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_GENERALISTES_PAR_HABITANTS + - - Dataflow for BPE_DETAIL - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_DETAIL(1.0) + + Dataflow for BPE_DETAIL + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_DETAIL(1.0) + - - Dataflow for LEGAL_POP_CUBE - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP_CUBE(1.0) + + Dataflow for LEGAL_POP_CUBE + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP_CUBE(1.0) + - + Dataflow for BPE_MUNICIPALITY - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_MUNICIPALITY(1.0) + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_MUNICIPALITY(1.0) + - + Dataflow for BPE_NUTS3 - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_NUTS3(1.0) + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_NUTS3(1.0) + - + Dataflow for BPE_CENSUS_NUTS3_2021 - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_CENSUS_NUTS3_2021(1.0) + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_CENSUS_NUTS3_2021(1.0) + - - VTL Mapping Scheme #1 - - VTL Mapping #1 - urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:BPE_DETAIL(1.0) - - - VTL Mapping #2 - urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:LEGAL_POP_CUBE(1.0) - + + VTL Mapping Scheme #1 + + VTL Mapping #1 + urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:BPE_DETAIL(1.0) + + + VTL Mapping #2 + urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:LEGAL_POP_CUBE(1.0) + + - - Ruleset Scheme #1 - - Datapoint Ruleset UNIQUE_MUNICIPALITY - - define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is - MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode "Municipality code is not in the correct format" - end datapoint ruleset; - - - - Datapoint Ruleset NUTS3_TYPES - - define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is - BOWLING_ALLEY_RULE : when facility_type = "F102" then nb > 10 errorcode "Not enough bowling alleys" - end datapoint ruleset; - - - + + Ruleset Scheme #1 + + Datapoint Ruleset UNIQUE_MUNICIPALITY + + define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is + MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode + "Municipality code is not in the correct format" + end datapoint ruleset; + + + + Datapoint Ruleset NUTS3_TYPES + + define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is + BOWLING_ALLEY_RULE : when facility_type = "F102" then nb > 10 errorcode "Not enough bowling + alleys" + end datapoint ruleset; + + + - - Transformation Scheme for BPE - CENSUS - - Step 1 - Validation of municipality code in input file - check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid) - CHECK_MUNICIPALITY - - - Step 2 - Clean BPE input database - - BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y] - [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year] - - BPE_DETAIL_CLEAN - - - Step 3 - BPE aggregation by municipality, type and year - - BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD] - - BPE_MUNICIPALITY - - - Step 4 - BPE aggregation by NUTS 3, type and year - - BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = "97" then substr(municipality,1,3) else substr(municipality,1,2)] - [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type] - - BPE_NUTS3 - - - Step 5 - BPE validation of facility types by NUTS 3 - check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid) - CHECK_NUTS3_TYPES - - - Step 6 - Prepare 2021 census dataset by NUTS 3 - - LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop] - [filter TIME_PERIOD = "2021"] - [calc pop := cast(pop, integer)] - [drop TIME_PERIOD, NB_COM, POP_MUNI] - - CENSUS_NUTS3_2021 - - - Step 7 - Extract dataset on general practitioners from BPE by NUTS 3 in 2021 - - BPE_NUTS3 [filter facility_type = "D201" and TIME_PERIOD = "2021"] - [drop facility_type, TIME_PERIOD] - - GENERAL_PRACT_NUTS3_2021 - - - Step 8 - Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator - - inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021) + + Transformation Scheme for BPE - CENSUS + + Step 1 + Validation of municipality code in input file + check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid) + CHECK_MUNICIPALITY + + + Step 2 + Clean BPE input database + + BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y] + [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year] + + BPE_DETAIL_CLEAN + + + Step 3 + BPE aggregation by municipality, type and year + + BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year + to TIME_PERIOD] + + BPE_MUNICIPALITY + + + Step 4 + BPE aggregation by NUTS 3, type and year + + BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = "97" then substr(municipality,1,3) + else substr(municipality,1,2)] + [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type] + + BPE_NUTS3 + + + Step 5 + BPE validation of facility types by NUTS 3 + check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid) + CHECK_NUTS3_TYPES + + + Step 6 + Prepare 2021 census dataset by NUTS 3 + + LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop] + [filter TIME_PERIOD = "2021"] + [calc pop := cast(pop, integer)] + [drop NB_COM, POP_MUNI] + + CENSUS_NUTS3_2021 + + + Step 7 + Extract dataset on general practitioners from BPE by NUTS 3 in 2021 + + + BPE_NUTS3 [filter facility_type = "D201" and TIME_PERIOD = "2021"] + + GENERAL_PRACT_NUTS3_2021 + + + Step 8 + Merge practitioners and legal population datasets by NUTS 3 in 2021 + and compute an indicator + + + inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3) [calc pract_per_10000_inhabitants := nb / pop * 10000] [drop nb, pop] - - BPE_CENSUS_NUTS3_2021 - - urn:sdmx:org.sdmx.infomodel.transformation.VtlMappingScheme=FR1:VTLMS1(1.0) - urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0) - - + + BPE_CENSUS_NUTS3_2021 + + urn:sdmx:org.sdmx.infomodel.transformation.VtlMappingScheme=FR1:VTLMS1(1.0) + + urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0) + + + \ No newline at end of file diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java index 5cfc73340..d0e97f7dd 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java @@ -130,11 +130,10 @@ public void testParquetMetadataWriting(@TempDir Path tmpDirectory) throws Script context.setAttribute("ds2", dsWithMetadata, ScriptContext.ENGINE_SCOPE); - engine.eval("ds3 := ds2[calc attribute school_id := school_id, identifier year := year];"); + engine.eval("ds3 := ds2[calc identifier year := year];"); SparkDataset dsWithMetadataAndRoles = (SparkDataset) engine.getContext().getAttribute("ds3"); - assertTrue(dsWithMetadataAndRoles.getDataStructure().get("school_id").isAttribute()); assertTrue(dsWithMetadataAndRoles.getDataStructure().get("year").isIdentifier()); } } diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java index 3ad9bb9d3..10808118b 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java @@ -47,7 +47,7 @@ public void testOperators() throws ScriptException { engine.eval( "res := ds1#long1; " + "res1 := isnull(ds1); " - + "ds_1 := ds1[keep id, long1, double1]; ds_2 := ds2[keep id, long1, double1]; " + + "ds_1 := ds1[keep long1, double1]; ds_2 := ds2[keep long1, double1]; " + "res2 := ds_1 + ds_2; " + "res3 := ds_1 - ds_2; " + "res4 := ds_1 * ds_2; " @@ -67,7 +67,8 @@ public void testOperators() throws ScriptException { + "res18 := mod(ds_1, 5); " + "res19 := power(ds_1, 5); " + "res20 := log(abs(ds_1), 5); " - + "ds_11 := ds1[keep id, string1, string2]; ds_22 := ds2[keep id, string1][calc string2 := string1]; " + + "ds_11 := ds1[keep string1, string2]; " + + "ds_22 := ds2[keep string1][calc string2 := string1]; " + "res21 := ds_11 || ds_22; "); var res = engine.getContext().getAttribute("res21"); assertThat(((Dataset) res).getDataStructure().get("string1").getType()).isEqualTo(String.class); @@ -78,8 +79,8 @@ public void testPlan() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + "ds1 := ds_1[keep long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep long1][rename long1 to bool_var]; " + "res := if ds1 > ds2 then ds1 else ds2;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java index 842758114..5512f86d6 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java @@ -59,7 +59,7 @@ public void testProjection() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval("ds1 := ds[keep name, age];"); + engine.eval("ds1 := ds[keep age];"); assertThat(engine.getContext().getAttribute("ds")) .isInstanceOf(fr.insee.vtl.model.Dataset.class); diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java index 19f8eff0d..888bc55b7 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java @@ -713,9 +713,9 @@ public void testValidationSimpleException() throws ScriptException { Dataset DS_r_to_rename = (Dataset) engine.getContext().getAttribute("DS_r_to_rename"); List DS_r_to_renameMeasure = DS_r_to_rename.getDataStructure().values().stream() - .filter(c -> c.isMeasure()) - .map(c -> c.getName()) - .collect(Collectors.toList()); + .filter(Structured.Component::isMeasure) + .map(Structured.Component::getName) + .toList(); assertThat(DS_r_to_renameMeasure.size()).isEqualTo(4); assertThat(DS_r_to_renameMeasure.contains("imbalance")).isTrue(); } @@ -744,8 +744,8 @@ public void serializationCheckDatapointTest() throws ScriptException { "ds1_1 := ds1[calc identifier id := id, long1 := cast(long1, integer), double1 := cast(double1, number), bool1 := cast(bool1, boolean)]; " + "ds2_1 := ds2[calc identifier id := id, long1 := cast(long1, integer), double1 := cast(double1, number), bool1 := cast(bool1, boolean)]; " + "ds_concat := ds1_1#string1 || \" and \" || ds2_1#string1; " - + "ds1_num := ds1_1[keep id, long1, double1]; " - + "ds2_num := ds2_1[keep id, long1, double1]; " + + "ds1_num := ds1_1[keep long1, double1]; " + + "ds2_num := ds2_1[keep long1, double1]; " + "ds_mod := mod(ds1_num, 2); " + "ds_sum := ds1_num + ds2_num; " + "ds_compare := ds1_num = ds2_num; " From 1c5017af1be1bca19fd75710c87292ad398db461 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Tue, 13 Jan 2026 11:30:11 +0100 Subject: [PATCH 10/15] Improve ClauseVisitor --- .../vtl/engine/visitors/ClauseVisitor.java | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index 451dc56a1..7f0201b77 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -13,6 +13,7 @@ import fr.insee.vtl.parser.VtlBaseVisitor; import fr.insee.vtl.parser.VtlParser; import java.util.*; +import java.util.function.Function; import java.util.stream.Collectors; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.misc.Interval; @@ -108,28 +109,20 @@ private static AggregationExpression convertToAggregation( public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext ctx) { // The type of the op can either be KEEP or DROP. - final boolean keep = ctx.op.getType() == VtlParser.KEEP; + boolean keep = ctx.op.getType() == VtlParser.KEEP; // Columns explicitly requested in the KEEP/DROP clause - final List columnNames = - ctx.componentID().stream().map(ClauseVisitor::getName).toList(); + List cleanColumnNames = ctx.componentID().stream().map(ClauseVisitor::getName).toList(); - // All available dataset components - final List inputColumnDataTypes = - new ArrayList<>(datasetExpression.getDataStructure().values()); - final List inputColumns = - inputColumnDataTypes.stream().map(Dataset.Component::getName).toList(); + Collection inputColumns = datasetExpression.getDataStructure().keySet(); // Dataset identifiers (role = IDENTIFIER) - final Map identifiers = - inputColumnDataTypes.stream() - .filter(c -> c.getRole() == Dataset.Role.IDENTIFIER) - .collect( - Collectors.toMap( - Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + Map identifiers = + datasetExpression.getDataStructure().getIdentifiers().stream() + .collect(Collectors.toMap(Structured.Component::getName, Function.identity())); // Evaluate that all requested columns must exist in the dataset or raise an error - for (String requested : columnNames) { + for (String requested : cleanColumnNames) { if (!inputColumns.contains(requested)) { throw new VtlRuntimeException( new InvalidArgumentException( @@ -139,8 +132,8 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext } // VTL specification: identifiers must not appear explicitly in KEEP - final Set forbidden = - columnNames.stream() + Set forbidden = + cleanColumnNames.stream() .filter(identifiers::containsKey) .collect(Collectors.toCollection(LinkedHashSet::new)); @@ -168,10 +161,10 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext final Set resultSet = new LinkedHashSet<>(); resultSet.addAll(identifiers.keySet()); if (keep) { - resultSet.addAll(columnNames); + resultSet.addAll(cleanColumnNames); } else { for (String col : inputColumns) { - if (!columnNames.contains(col)) { + if (!cleanColumnNames.contains(col)) { resultSet.add(col); } } From eae1be6b633050572289363d218a31d0f8b16ecb Mon Sep 17 00:00:00 2001 From: Hadrien Kohl Date: Tue, 13 Jan 2026 14:25:13 +0100 Subject: [PATCH 11/15] Extract keep / drop --- .../vtl/engine/visitors/ClauseVisitor.java | 271 +----- .../insee/vtl/engine/utils/dag/DagTest.java | 46 +- .../engine/visitors/ClauseVisitorTest.java | 100 +-- .../ArithmeticExprOrConcatTest.java | 6 +- .../expression/ArithmeticExprTest.java | 6 +- .../visitors/expression/BooleanExprTest.java | 4 +- .../expression/ComparisonExprTest.java | 14 +- .../expression/ConditionalExprTest.java | 16 +- .../visitors/expression/UnaryExprTest.java | 4 +- .../functions/ComparisonFunctionsTest.java | 7 +- .../functions/DistanceFunctionsTest.java | 2 +- .../functions/JoinFunctionsTest.java | 10 +- .../functions/NumericFunctionsTest.java | 24 +- .../functions/SetFunctionsVisitorTest.java | 4 +- .../functions/StringFunctionsTest.java | 18 +- .../vtl/prov/ProvenanceListenerTest.java | 4 +- .../test/java/fr/insee/vtl/prov/RDFTest.java | 7 +- .../src/test/java/fr/insee/vtl/BPETest.java | 7 +- .../fr/insee/vtl/SDMXVTLWorkflowTest.java | 52 +- .../src/test/resources/DSD_BPE_CENSUS.xml | 785 +++++++----------- .../fr/insee/vtl/spark/SparkDatasetTest.java | 3 +- .../processing.engine/OperatorsTest.java | 9 +- .../spark/processing.engine/ProjectTest.java | 2 +- .../processing.engine/ValidationTest.java | 10 +- 24 files changed, 489 insertions(+), 922 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index 7f0201b77..fefde8900 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -13,7 +13,6 @@ import fr.insee.vtl.parser.VtlBaseVisitor; import fr.insee.vtl.parser.VtlParser; import java.util.*; -import java.util.function.Function; import java.util.stream.Collectors; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.misc.Interval; @@ -107,287 +106,83 @@ private static AggregationExpression convertToAggregation( @Override public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext ctx) { + // Normalize to keep operation. + var keep = ctx.op.getType() == VtlParser.KEEP; + var names = ctx.componentID().stream().map(ClauseVisitor::getName).collect(Collectors.toSet()); + List columnNames = + datasetExpression.getDataStructure().values().stream() + .map(Dataset.Component::getName) + .filter(name -> keep == names.contains(name)) + .collect(Collectors.toList()); - // The type of the op can either be KEEP or DROP. - boolean keep = ctx.op.getType() == VtlParser.KEEP; - - // Columns explicitly requested in the KEEP/DROP clause - List cleanColumnNames = ctx.componentID().stream().map(ClauseVisitor::getName).toList(); - - Collection inputColumns = datasetExpression.getDataStructure().keySet(); - - // Dataset identifiers (role = IDENTIFIER) - Map identifiers = - datasetExpression.getDataStructure().getIdentifiers().stream() - .collect(Collectors.toMap(Structured.Component::getName, Function.identity())); - - // Evaluate that all requested columns must exist in the dataset or raise an error - for (String requested : cleanColumnNames) { - if (!inputColumns.contains(requested)) { - throw new VtlRuntimeException( - new InvalidArgumentException( - // TODO: use actual column context. - String.format("'%s' not found in dataset.", requested), fromContext(ctx))); - } - } - - // VTL specification: identifiers must not appear explicitly in KEEP - Set forbidden = - cleanColumnNames.stream() - .filter(identifiers::containsKey) - .collect(Collectors.toCollection(LinkedHashSet::new)); - - if (!forbidden.isEmpty()) { - StringBuilder details = new StringBuilder(); - for (String id : forbidden) { - Dataset.Component comp = identifiers.get(id); - details.append( - String.format( - "%s(role=%s, type=%s) ", - id, comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")); - } - throw new VtlRuntimeException( - new InvalidArgumentException( - String.format( - "identifiers %s must not be explicitly listed in KEEP/DROP. Details: %s", - forbidden, details.toString().trim()), - // TODO: use actual column context. - fromContext(ctx))); - } - - // Build result set: - // + KEEP: identifiers + requested columns - // + DROP: (all columns - requested) + identifiers - final Set resultSet = new LinkedHashSet<>(); - resultSet.addAll(identifiers.keySet()); - if (keep) { - resultSet.addAll(cleanColumnNames); - } else { - for (String col : inputColumns) { - if (!cleanColumnNames.contains(col)) { - resultSet.add(col); - } - } - } - - // Retrieve the output column names (identifiers + requested) - final List outputColumns = - inputColumns.stream().filter(resultSet::contains).collect(Collectors.toList()); - return processingEngine.executeProject(datasetExpression, outputColumns); + return processingEngine.executeProject(datasetExpression, columnNames); } @Override public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { - // Dataset structure (ordered) and quick lookups - final List componentsInOrder = - new ArrayList<>(datasetExpression.getDataStructure().values()); - - final Map byName = - componentsInOrder.stream() - .collect( - Collectors.toMap( - Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); - - // Accumulators for non-analytic calc items - final LinkedHashMap expressions = new LinkedHashMap<>(); - final LinkedHashMap expressionStrings = new LinkedHashMap<>(); - final LinkedHashMap roles = new LinkedHashMap<>(); - - // Tracks duplicates in the same clause (target names) - final Set targetsSeen = new LinkedHashSet<>(); - - // We need a rolling dataset expression to chain analytics items - DatasetExpression currentDatasetExpression = datasetExpression; - - // TODO: Refactor so we call executeCalc per CalcClauseItemContext (as analytics do). + var expressions = new LinkedHashMap(); + var expressionStrings = new LinkedHashMap(); + var roles = new LinkedHashMap(); + var currentDatasetExpression = datasetExpression; + // TODO: Refactor so we call the executeCalc for each CalcClauseItemContext the same way we call + // the + // analytics functions. for (VtlParser.CalcClauseItemContext calcCtx : ctx.calcClauseItem()) { - - // ---- Resolve target name and desired role ---- - final String columnName = getName(calcCtx.componentID()); - final Dataset.Role columnRole = - (calcCtx.componentRole() == null) + var columnName = getName(calcCtx.componentID()); + var columnRole = + calcCtx.componentRole() == null ? Dataset.Role.MEASURE : Dataset.Role.valueOf(calcCtx.componentRole().getText().toUpperCase()); - // If the target already exists in the dataset, check its role - final Dataset.Component existing = byName.get(columnName); - if (existing != null) { - // Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER). - if (existing.getRole() == Dataset.Role.IDENTIFIER) { - final String meta = - String.format( - "(role=%s, type=%s)", - existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); - throw new VtlRuntimeException( - new InvalidArgumentException( - // TODO: see if other cases are the same error (already defined in assignment for - // example). - String.format("CALC cannot overwrite IDENTIFIER '%s' %s.", columnName, meta), - fromContext(ctx))); - } - } - - // ---- Dispatch: analytics vs. regular calc ---- - final boolean isAnalytic = - (calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) - && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() - instanceof VtlParser.AnalyticFunctionsContext; - - if (isAnalytic) { - // Analytics are executed immediately and update the rolling dataset expression - final AnalyticsVisitor analyticsVisitor = + if ((calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) + && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() + instanceof VtlParser.AnalyticFunctionsContext) { + AnalyticsVisitor analyticsVisitor = new AnalyticsVisitor(processingEngine, currentDatasetExpression, columnName); - final VtlParser.FunctionsExpressionContext functionExprCtx = + VtlParser.FunctionsExpressionContext functionExprCtx = (VtlParser.FunctionsExpressionContext) calcCtx.expr(); - final VtlParser.AnalyticFunctionsContext anFuncCtx = + VtlParser.AnalyticFunctionsContext anFuncCtx = (VtlParser.AnalyticFunctionsContext) functionExprCtx.functions(); - currentDatasetExpression = analyticsVisitor.visit(anFuncCtx); } else { - // Regular calc expression – build resolvable expression and capture its source text - final ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); + ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); - final String exprSource = getSource(calcCtx.expr()); - if (exprSource == null || exprSource.isEmpty()) { - throw new VtlRuntimeException( - new InvalidArgumentException( - String.format( - "empty or unavailable source expression for '%s' in CALC.", columnName), - fromContext(ctx))); - } - - // Store in insertion order (deterministic column creation) expressions.put(columnName, calc); - expressionStrings.put(columnName, exprSource); + expressionStrings.put(columnName, getSource(calcCtx.expr())); roles.put(columnName, columnRole); } } - // ---- Consistency checks before execution ---- - if (!(expressions.keySet().equals(expressionStrings.keySet()) - && expressions.keySet().equals(roles.keySet()))) { - throw new VtlRuntimeException( - new InvalidArgumentException( - "internal CALC maps out of sync (expressions/expressionStrings/roles)", - fromContext(ctx))); - } - - // ---- Execute the batch calc if any non-analytic expressions were collected ---- if (!expressionStrings.isEmpty()) { currentDatasetExpression = processingEngine.executeCalc( currentDatasetExpression, expressions, roles, expressionStrings); } + return currentDatasetExpression; } @Override public DatasetExpression visitFilterClause(VtlParser.FilterClauseContext ctx) { - - // Error reporting context - final int line = ctx.getStart().getLine(); - final int charPosition = ctx.getStart().getCharPositionInLine(); - final String statement = ctx.getText(); - ResolvableExpression filter = componentExpressionVisitor.visit(ctx.expr()); return processingEngine.executeFilter(datasetExpression, filter, getSource(ctx.expr())); } @Override public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { - - // Dataset structure in order + lookup maps - final List componentsInOrder = - new ArrayList<>(datasetExpression.getDataStructure().values()); - final Set availableColumns = - componentsInOrder.stream() - .map(Dataset.Component::getName) - .collect(Collectors.toCollection(LinkedHashSet::new)); - - // Map for detailed error reporting (includes role/type if available) - final Map byName = - componentsInOrder.stream() - .collect( - Collectors.toMap( - Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); - - // Parse the RENAME clause and validate Map fromTo = new LinkedHashMap<>(); - Set toSeen = new LinkedHashSet<>(); - Set fromSeen = new LinkedHashSet<>(); - + Set renamed = new HashSet<>(); for (VtlParser.RenameClauseItemContext renameCtx : ctx.renameClauseItem()) { - final String toNameString = getName(renameCtx.toName); - final String fromNameString = getName(renameCtx.fromName); - - // Validate: no duplicate "from" names inside the clause - if (!fromSeen.add(fromNameString)) { - throw new VtlRuntimeException( - new InvalidArgumentException( - String.format("Error: duplicate source name in RENAME clause: '%s", fromNameString), - fromContext(ctx))); - } - - // Validate: "from" must exist in dataset - if (!availableColumns.contains(fromNameString)) { - Dataset.Component comp = byName.get(fromNameString); - String meta = - (comp != null) - ? String.format( - " (role=%s, type=%s)", - comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") - : ""; + var toNameString = getName(renameCtx.toName); + var fromNameString = getName(renameCtx.fromName); + if (!renamed.add(toNameString)) { throw new VtlRuntimeException( new InvalidArgumentException( - String.format( - "Error: source column to rename not found: '%s'%s", fromNameString, meta), - fromContext(ctx))); + "duplicate column: %s".formatted(toNameString), fromContext(renameCtx))); } - - // Validate: no duplicate "to" names inside the clause - if (!toSeen.add(toNameString)) { - throw new VtlRuntimeException( - new InvalidArgumentException( - String.format( - "Error: duplicate output column name in RENAME clause: '%s.", fromNameString), - fromContext(ctx))); - } - fromTo.put(fromNameString, toNameString); } - - // Validate collisions with untouched dataset columns ("Untouched" = columns that are not - // being renamed) - final Set untouched = - availableColumns.stream() - .filter(c -> !fromTo.containsKey(c)) - .collect(Collectors.toCollection(LinkedHashSet::new)); - - for (Map.Entry e : fromTo.entrySet()) { - final String from = e.getKey(); - final String to = e.getValue(); - - // If target already exists as untouched, it would cause a collision - if (untouched.contains(to)) { - Dataset.Component comp = byName.get(to); - String meta = - (comp != null) - ? String.format( - " (role=%s, type=%s)", - comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") - : ""; - - throw new VtlRuntimeException( - new InvalidArgumentException( - String.format( - "Error: target name '%s'%s already exists in dataset and is not being renamed.", - to, meta), - fromContext(ctx))); - } - } - - // Execute rename in processing engine return processingEngine.executeRename(datasetExpression, fromTo); } diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java index fcb4a6037..841ba57e8 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java @@ -199,11 +199,11 @@ void testDagSimpleExampleWithReordering() throws ScriptException { void testDagCycle() { final String script = """ - e := a; - b := a; - c := b; - a := c; - f := a;"""; + e := a; + b := a; + c := b; + a := c; + f := a;"""; final Positioned.Position mainPosition = getPositionOfStatementInScript("a := c", script); final List otherPositions = @@ -231,14 +231,14 @@ void testDagCycle() { void testMultipleCycles() { final String script = """ - h := g; - i := join(h, input_ds); - g := i; - e := a; - b := a; - c := b; - a := c; - f := a;"""; + h := g; + i := join(h, input_ds); + g := i; + e := a; + b := a; + c := b; + a := c; + f := a;"""; final Positioned.Position mainExceptionMainPosition = getPositionOfStatementInScript("g := i", script); @@ -446,8 +446,8 @@ void testDagIfExpr() throws ScriptException { engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( "res := if ds1 > ds2 then ds1 else ds2; " - + "ds1 := ds_1[keep long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep long1][rename long1 to bool_var];"); + + "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep id, long1][rename long1 to bool_var];"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -463,7 +463,7 @@ void testDagCaseExpr() throws ScriptException { engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( "res0 <- tmp0[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1]; " - + "tmp0 := ds_1[keep long1];"); + + "tmp0 := ds_1[keep id, long1];"); Object res0 = engine.getContext().getAttribute("res0"); assertThat(((Dataset) res0).getDataAsMap()) .containsExactlyInAnyOrder( @@ -474,7 +474,7 @@ void testDagCaseExpr() throws ScriptException { assertThat(((Dataset) res0).getDataStructure().get("c").getType()).isEqualTo(String.class); engine.eval( "res1 <- tmp1[calc c := case when long1 > 30 then 1 else 0][drop long1]; " - + "tmp1 := ds_1[keep long1];"); + + "tmp1 := ds_1[keep id, long1];"); Object res1 = engine.getContext().getAttribute("res1"); assertThat(((Dataset) res1).getDataAsMap()) .containsExactlyInAnyOrder( @@ -484,9 +484,9 @@ void testDagCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", 1L)); assertThat(((Dataset) res1).getDataStructure().get("c").getType()).isEqualTo(Long.class); engine.eval( - "tmp2_alt_ds1 := ds_1[keep long1][rename long1 to bool_var]; " + "tmp2_alt_ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " + "res2 <- case when tmp2_alt_ds1 < 30 then tmp2_alt_ds1 else tmp2_alt_ds2; " - + "tmp2_alt_ds2 := ds_2[keep long1][rename long1 to bool_var];"); + + "tmp2_alt_ds2 := ds_2[keep id, long1][rename long1 to bool_var];"); Object resDs = engine.getContext().getAttribute("res2"); assertThat(((Dataset) resDs).getDataAsMap()) .containsExactlyInAnyOrder( @@ -498,7 +498,7 @@ void testDagCaseExpr() throws ScriptException { @Test void testDagNvlExpr() throws ScriptException { engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res <- nvl(tmp1[keep long1], 0); tmp1 := ds1;"); + engine.eval("res <- nvl(tmp1[keep id, long1], 0); tmp1 := ds1;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -512,7 +512,7 @@ void testDagNvlExpr() throws ScriptException { @Test void testDagNvlImplicitCast() throws ScriptException { engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(tmp1[keep long1], 0.1); tmp1 <- ds1;"); + engine.eval("res := nvl(tmp1[keep id, long1], 0.1); tmp1 <- ds1;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -528,7 +528,7 @@ void testDagUnaryExpr() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := + tmp1[keep long1, double1]; tmp1 <- ds2;"); + Object res = engine.eval("res := + tmp1[keep id, long1, double1]; tmp1 <- ds2;"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D), @@ -537,7 +537,7 @@ void testDagUnaryExpr() throws ScriptException { assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Long.class); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res2 = engine.eval("res2 := - tmp2[keep long1, double1]; tmp2 := ds2;"); + Object res2 = engine.eval("res2 := - tmp2[keep id, long1, double1]; tmp2 := ds2;"); assertThat(((Dataset) res2).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index a4127d2bd..5c25d981b 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -1,5 +1,6 @@ package fr.insee.vtl.engine.visitors; +import static fr.insee.vtl.engine.VtlScriptEngineTest.atPosition; import static fr.insee.vtl.model.Dataset.Role; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -85,9 +86,8 @@ public void testManyCalc() throws ScriptException { Map.of("name", "Franck", "weight", 9L, "wisdom", 24L)); } - /** CALC: measures/attributes are allowed and should be created as requested. */ @Test - public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptException { + public void testCalcRoleModifier() throws ScriptException { InMemoryDataset dataset = new InMemoryDataset( List.of( @@ -125,9 +125,8 @@ public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptExceptio assertThat(unitComponent.getRole()).isEqualTo(Role.ATTRIBUTE); } - /** RENAME: duplicate "to" name inside the clause must raise a detailed script error. */ @Test - public void testRenameClause_duplicateToNameShouldFail() { + public void testRenameClause() throws ScriptException { InMemoryDataset dataset = new InMemoryDataset( List.of( @@ -137,67 +136,23 @@ public void testRenameClause_duplicateToNameShouldFail() { Map.of("name", String.class, "age", Long.class, "weight", Long.class), Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - - assertThatThrownBy( - () -> engine.eval("ds := ds1[rename age to weight, weight to age, name to age];")) - .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("duplicate output column name in RENAME clause"); - } - - /** RENAME: duplicate "from" name inside the clause must raise a detailed script error. */ - @Test - public void testRenameClause_duplicateFromNameShouldFail() { - InMemoryDataset dataset = - new InMemoryDataset( - List.of( - Map.of("name", "Hadrien", "age", 10L, "weight", 11L), - Map.of("name", "Nico", "age", 11L, "weight", 10L)), - Map.of("name", String.class, "age", Long.class, "weight", Long.class), - Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - - engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - - assertThatThrownBy(() -> engine.eval("ds := ds1[rename age to weight, age to weight2];")) - .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("duplicate source name in RENAME clause"); - } - - /** RENAME: "from" column must exist in dataset. */ - @Test - public void testRenameClause_fromColumnNotFoundShouldFail() { - InMemoryDataset dataset = - new InMemoryDataset( - List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)), - Map.of("name", String.class, "age", Long.class, "weight", Long.class), - Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - - engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - - assertThatThrownBy(() -> engine.eval("ds := ds1[rename unknown to something];")) - .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("source column to rename not found: 'unknown'"); - } + ScriptContext context = engine.getContext(); + context.setAttribute("ds", dataset, ScriptContext.ENGINE_SCOPE); - /** - * RENAME: target collides with an untouched existing column -> must error with details - * (role/type). - */ - @Test - public void testRenameClause_targetCollidesWithUntouchedShouldFail() { - InMemoryDataset dataset = - new InMemoryDataset( - List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)), - Map.of("name", String.class, "age", Long.class, "weight", Long.class), - Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + engine.eval("ds1 := ds[rename age to weight, weight to age, name to pseudo];"); - engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + assertThat(engine.getContext().getAttribute("ds1")).isInstanceOf(Dataset.class); + assertThat(((Dataset) engine.getContext().getAttribute("ds1")).getDataAsMap()) + .containsExactlyInAnyOrder( + Map.of("pseudo", "Hadrien", "weight", 10L, "age", 11L), + Map.of("pseudo", "Nico", "weight", 11L, "age", 10L), + Map.of("pseudo", "Franck", "weight", 12L, "age", 9L)); - assertThatThrownBy(() -> engine.eval("ds := ds1[rename name to age];")) + assertThatThrownBy( + () -> engine.eval("ds2 := ds[rename age to weight, weight to age, name to age];")) .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("target name 'age'") // main message - .hasMessageContaining("already exists in dataset and is not being renamed") - .hasMessageContaining("(role=MEASURE, type=class java.lang.Long)"); + .hasMessage("duplicate column: age") + .is(atPosition(0, 47, 58)); } @Test @@ -239,8 +194,7 @@ public void testKeepDropClause() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - // KEEP: identifiers must not be listed explicitly; they are implicitly preserved. - engine.eval("ds2 := ds1[keep age];"); + engine.eval("ds2 := ds1[keep name, age];"); assertThat(engine.getContext().getAttribute("ds2")).isInstanceOf(Dataset.class); assertThat(((Dataset) engine.getContext().getAttribute("ds2")).getDataAsMap()) @@ -259,26 +213,6 @@ public void testKeepDropClause() throws ScriptException { Map.of("name", "Franck", "age", 12L)); } - /** KEEP/DROP: listing identifiers explicitly must raise a script error. */ - @Test - public void testKeepDropClause_identifierExplicitShouldFail() { - InMemoryDataset dataset = - new InMemoryDataset( - List.of( - Map.of("name", "Hadrien", "age", 10L, "weight", 11L), - Map.of("name", "Nico", "age", 11L, "weight", 10L), - Map.of("name", "Franck", "age", 12L, "weight", 9L)), - Map.of("name", String.class, "age", Long.class, "weight", Long.class), - Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - - ScriptContext context = engine.getContext(); - context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - - assertThatThrownBy(() -> engine.eval("ds := ds1[keep name, age];")) - .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("identifiers [name] must not be explicitly listed in KEEP/DROP"); - } - @Test public void testAggregateType() { InMemoryDataset dataset = diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java index d3cb18544..3d61e46ba 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java @@ -52,7 +52,7 @@ public void testPlus() throws ScriptException { assertThat(context.getAttribute("plus2")).isEqualTo(5.0); context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds1[keep long1, long2] + ds1[keep long1, long2];"); + Object res = engine.eval("res := ds1[keep id, long1, long2] + ds1[keep id, long1, long2];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 60L, "long2", 600L), @@ -75,7 +75,7 @@ public void testMinus() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds2[keep long1] - ds1[keep long1] + 1;"); + Object res = engine.eval("res := ds2[keep id, long1] - ds1[keep id, long1] + 1;"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 141L), @@ -92,7 +92,7 @@ public void testConcat() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds2[keep string1] || \" \" || ds1[keep string1];"); + Object res = engine.eval("res := ds2[keep id, string1] || \" \" || ds1[keep id, string1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "string1", "hadrien hadrien"), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java index 353838880..c5822ea0f 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java @@ -64,7 +64,8 @@ public void testArithmeticExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(ds1[keep long1, double1] * ds2[keep long1, double1]);"); + Object res = + engine.eval("res := round(ds1[keep id, long1, double1] * ds2[keep id, long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 1500.0, "double1", 1.0), @@ -82,7 +83,8 @@ public void testArithmeticExpr() throws ScriptException { engine.eval("div4 := 3.0 / 1.5;"); assertThat(context.getAttribute("div4")).isEqualTo(2.0); - res = engine.eval("res2 := round(ds1[keep long1, double1] / ds2[keep long1, double1]);"); + res = + engine.eval("res2 := round(ds1[keep id, long1, double1] / ds2[keep id, long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 0.0, "double1", 1.0), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java index d9ba19e7d..7e7216f6d 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java @@ -100,8 +100,8 @@ public void testOnDatasets() throws ScriptException { context.setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds_2", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep bool2][rename bool2 to bool1]; " - + "ds2 := ds_2[keep bool1]; " + "ds1 := ds_1[keep id, bool2][rename bool2 to bool1]; " + + "ds2 := ds_2[keep id, bool1]; " + "andDs := ds1 and ds2; " + "orDs := ds1 or ds2; " + "xorDs := ds1 xor ds2; "); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java index 297becc48..70921ac91 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java @@ -73,7 +73,7 @@ public void testComparisonExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("equal := ds1[keep long1] = ds2[keep long1];"); + engine.eval("equal := ds1[keep id, long1] = ds2[keep id, long1];"); var equal = engine.getContext().getAttribute("equal"); assertThat(((Dataset) equal).getDataAsMap()) .containsExactlyInAnyOrder( @@ -90,7 +90,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("long1")).isTrue(); engine.eval("mix1 := 6 <> (3*20.0);"); assertThat((Boolean) context.getAttribute("mix1")).isTrue(); - engine.eval("notEqual := ds1[keep long1] <> ds2[keep long1];"); + engine.eval("notEqual := ds1[keep id, long1] <> ds2[keep id, long1];"); var notEqual = engine.getContext().getAttribute("notEqual"); assertThat(((Dataset) notEqual).getDataAsMap()) .containsExactlyInAnyOrder( @@ -106,7 +106,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("lt1")).isFalse(); engine.eval("mix2 := 6 < 6.1;"); assertThat((Boolean) context.getAttribute("mix2")).isTrue(); - engine.eval("lt2 := ds1[keep long1] < ds2[keep long1];"); + engine.eval("lt2 := ds1[keep id, long1] < ds2[keep id, long1];"); var lt = engine.getContext().getAttribute("lt2"); assertThat(((Dataset) lt).getDataAsMap()) .containsExactlyInAnyOrder( @@ -122,7 +122,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("mt1")).isTrue(); engine.eval("mix4 := 6 > 6.1;"); assertThat((Boolean) context.getAttribute("mix4")).isFalse(); - engine.eval("mt2 := ds1[keep long1] > ds2[keep long1];"); + engine.eval("mt2 := ds1[keep id, long1] > ds2[keep id, long1];"); var mt = engine.getContext().getAttribute("mt2"); assertThat(((Dataset) mt).getDataAsMap()) .containsExactlyInAnyOrder( @@ -139,7 +139,7 @@ public void testComparisonExpr() throws ScriptException { engine.eval("mix5 := 6 <= 6.1;"); assertThat((Boolean) context.getAttribute("mix5")).isTrue(); - engine.eval("le2 := ds1[keep long1] <= ds2[keep long1];"); + engine.eval("le2 := ds1[keep id, long1] <= ds2[keep id, long1];"); var le = engine.getContext().getAttribute("le2"); assertThat(((Dataset) le).getDataAsMap()) .containsExactlyInAnyOrder( @@ -156,7 +156,7 @@ public void testComparisonExpr() throws ScriptException { engine.eval("mix6 := 6 >= 6.1;"); assertThat((Boolean) context.getAttribute("mix6")).isFalse(); - engine.eval("me2 := ds1[keep long1] >= ds2[keep long1];"); + engine.eval("me2 := ds1[keep id, long1] >= ds2[keep id, long1];"); var me = engine.getContext().getAttribute("me2"); assertThat(((Dataset) me).getDataAsMap()) .containsExactlyInAnyOrder( @@ -198,7 +198,7 @@ public void testInNotIn() throws ScriptException { assertThat((Boolean) engine.getContext().getAttribute("res4")).isTrue(); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("me := ds[keep long1, string1] in {\"toto\", \"franck\"};"); + engine.eval("me := ds[keep id, long1, string1] in {\"toto\", \"franck\"};"); var in = engine.getContext().getAttribute("me"); assertThat(((Dataset) in).getDataAsMap()) .containsExactlyInAnyOrder( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java index 5ce05c628..2994c97c7 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java @@ -46,8 +46,8 @@ public void testIfExpr() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep long1][rename long1 to bool_var]; " + "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + "res := if ds1 > ds2 then ds1 else ds2;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) @@ -73,7 +73,7 @@ public void testCaseExpr() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep long1]; " + "ds1 := ds_1[keep id, long1]; " + "res <- ds1[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1];"); Object res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) @@ -84,7 +84,7 @@ public void testCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", "ok")); assertThat(((Dataset) res).getDataStructure().get("c").getType()).isEqualTo(String.class); engine.eval( - "ds2 := ds_1[keep long1]; " + "ds2 := ds_1[keep id, long1]; " + "res2 <- ds2[calc c := case when long1 > 30 then 1 else 0][drop long1];"); Object res2 = engine.getContext().getAttribute("res2"); assertThat(((Dataset) res2).getDataAsMap()) @@ -95,8 +95,8 @@ public void testCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", 1L)); assertThat(((Dataset) res2).getDataStructure().get("c").getType()).isEqualTo(Long.class); engine.eval( - "ds3 := ds_1[keep long1][rename long1 to bool_var];" - + "ds4 := ds_2[keep long1][rename long1 to bool_var]; " + "ds3 := ds_1[keep id, long1][rename long1 to bool_var];" + + "ds4 := ds_2[keep id, long1][rename long1 to bool_var]; " + "res_ds <- case when ds3 < 30 then ds3 else ds4;"); Object res_ds = engine.getContext().getAttribute("res_ds"); assertThat(((Dataset) res_ds).getDataAsMap()) @@ -115,7 +115,7 @@ public void testNvlExpr() throws ScriptException { assertThat(context.getAttribute("s2")).isEqualTo("default"); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(ds[keep long1], 0);"); + engine.eval("res := nvl(ds[keep id, long1], 0);"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -142,7 +142,7 @@ public void testNvlImplicitCast() throws ScriptException { assertThat(context.getAttribute("s2")).isEqualTo(1.1D); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(ds[keep long1], 0.1);"); + engine.eval("res := nvl(ds[keep id, long1], 0.1);"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java index bbe9aea24..45ef35973 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java @@ -44,7 +44,7 @@ public void testUnaryExpr() throws ScriptException { assertThat(context.getAttribute("plus1")).isEqualTo(1.5D); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res1 := + ds2[keep long1, double1];"); + Object res = engine.eval("res1 := + ds2[keep id, long1, double1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D), @@ -57,7 +57,7 @@ public void testUnaryExpr() throws ScriptException { engine.eval("plus3 := - 1.5;"); assertThat(context.getAttribute("plus3")).isEqualTo(-1.5D); - res = engine.eval("res2 := - ds2[keep long1, double1];"); + res = engine.eval("res2 := - ds2[keep id, long1, double1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java index e19a3d273..9e307d085 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java @@ -50,7 +50,7 @@ public void testBetweenAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("b2")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := between(ds[keep long1, double2], 5, 15);"); + Object res = engine.eval("res := between(ds[keep id, long1, double2], 5, 15);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", false, "double2", false), @@ -80,7 +80,8 @@ public void testCharsetMatchAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("t3")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := match_characters(ds[keep string1, string2], \"(.*)o(.*)\");"); + Object res = + engine.eval("res := match_characters(ds[keep id, string1, string2], \"(.*)o(.*)\");"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", true, "string2", false), @@ -113,7 +114,7 @@ public void testIsNullAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("n1")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := isnull(ds[keep string1, bool1]);"); + Object res = engine.eval("res := isnull(ds[keep id, string1, bool1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", false, "bool1", false), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java index 787e230b0..f83d6837f 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java @@ -43,7 +43,7 @@ public void testLevenshteinAtom() throws ScriptException { context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); Object res = engine.eval( - "res := levenshtein(ds[keep string1], ds[keep string2][rename string2 to string1])[rename string1 to lev];"); + "res := levenshtein(ds[keep id, string1], ds[keep id, string2][rename string2 to string1])[rename string1 to lev];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "lev", 3L), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java index f18c03612..e79293263 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java @@ -137,8 +137,7 @@ public void testLeftJoinWithDifferentIdentifiers() throws ScriptException { result2 := left_join(ds_1_1, ds_2 using Id_2);\ """)) .isInstanceOf(InvalidArgumentException.class) - .hasMessage( - "CALC cannot overwrite IDENTIFIER 'Id_2' (role=IDENTIFIER, type=class java.lang.Long)."); + .hasMessage("using component Id_2 has to be an identifier"); } @Test @@ -231,8 +230,9 @@ public void testLeftJoinMixedStructure() throws ScriptException { engine.getContext().setAttribute("ds1", dataset1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds2", dataset2, ScriptContext.ENGINE_SCOPE); - engine.eval("unionData := union(ds1[keep measure1, measure2], ds2[keep measure1, measure2]);"); - engine.eval("ds1_keep := ds1[keep color];"); + engine.eval( + "unionData := union(ds1[keep id, measure1, measure2], ds2[keep id, measure1, measure2]);"); + engine.eval("ds1_keep := ds1[keep id, color];"); engine.eval("joinData := left_join(unionData, ds1_keep);"); Dataset joinData = (Dataset) engine.getBindings(ScriptContext.ENGINE_SCOPE).get("joinData"); @@ -260,7 +260,7 @@ public void testInnerJoin() throws ScriptException { engine.getContext().setAttribute("ds_1", ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("result1 := inner_join(ds_1[keep m1] as ds1, ds_2 as ds2);"); + engine.eval("result1 := inner_join(ds_1[keep id1, id2, m1] as ds1, ds_2 as ds2);"); var result = (Dataset) engine.getContext().getAttribute("result1"); assertThat(result.getColumnNames()).containsExactlyInAnyOrder("id1", "id2", "m1", "m2"); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java index 558e1f720..d26b9617a 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java @@ -85,7 +85,7 @@ public void testCeil() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(0L); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ceil(ds[keep long1, double1]);"); + Object res = engine.eval("res := ceil(ds[keep id, long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 2L), @@ -112,7 +112,7 @@ public void testFloor() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(-1L); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(ds[keep double1]);"); + Object res = engine.eval("res := floor(ds[keep id, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 1L), @@ -135,7 +135,7 @@ public void testAbs() throws ScriptException { assertThat(context.getAttribute("b")).isEqualTo(5.5D); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := abs(ds[keep double1]);"); + Object res = engine.eval("res := abs(ds[keep id, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 1.1D), @@ -164,7 +164,7 @@ public void testExp() throws ScriptException { assertThat(((Double) context.getAttribute("d"))) .isCloseTo(0.367D, Percentage.withPercentage(1)); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(exp(ds[keep double1, long1]));"); + Object res = engine.eval("res := floor(exp(ds[keep id, double1, long1]));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 3L, "long1", 9223372036854775807L), @@ -192,7 +192,7 @@ public void testLn() throws ScriptException { .isCloseTo(-0.69D, Percentage.withPercentage(1)); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(ln(abs(ds[keep double1])));"); + Object res = engine.eval("res := floor(ln(abs(ds[keep id, double1])));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 0L), @@ -221,7 +221,7 @@ public void testRound() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(12350D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(ds[keep long1, double2], 1);"); + Object res = engine.eval("res := round(ds[keep id, long1, double2], 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 30.0D, "double2", 1.2D), @@ -258,7 +258,7 @@ public void testTrunc() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(12340D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(ds[keep long1, double2], 1);"); + Object res = engine.eval("res := trunc(ds[keep id, long1, double2], 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 30.0D, "double2", 1.2D), @@ -289,7 +289,7 @@ public void testSqrt() throws ScriptException { assertThat(context.getAttribute("c")).isEqualTo(0D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(sqrt(ds[keep long1, double2]));"); + Object res = engine.eval("res := round(sqrt(ds[keep id, long1, double2]));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 5.0D, "double2", 1D), @@ -321,7 +321,7 @@ public void testMod() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(9D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(mod(ds[keep long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(mod(ds[keep id, long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 0.0D, "double2", 1.2D), @@ -353,7 +353,7 @@ public void testPower() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(-125D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(power(ds[keep long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(power(ds[keep id, long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 900.0D, "double2", 1.4D), @@ -383,7 +383,7 @@ public void testRandom() throws ScriptException { assertThat((Double) context.getAttribute("a")).isCloseTo(0.72D, Percentage.withPercentage(0.2)); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(random(ds[keep long1], 200), 1);"); + Object res = engine.eval("res := trunc(random(ds[keep id, long1], 200), 1);"); assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Double.class); assertThatThrownBy( @@ -410,7 +410,7 @@ public void testLog() throws ScriptException { .isCloseTo(3.01D, Percentage.withPercentage(0.01)); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(log(ds[keep long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(log(ds[keep id, long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 4.9D, "double2", 0.2D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java index 510b58d6d..d548831ba 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java @@ -265,9 +265,9 @@ public void testUnion456Issue() throws ScriptException { context.getBindings(ScriptContext.ENGINE_SCOPE).put("MULTIMODE", multimodeDs); engine.eval( - "TEMP_RACINE := MULTIMODE [keep FOO];\n" + "TEMP_RACINE := MULTIMODE [keep interrogationId, FOO];\n" + "RACINE := union(TEMP_RACINE, TEMP_RACINE) ;\n" - + "TEMP_LOOP := MULTIMODE [keep LOOP.FOO1]\n" + + "TEMP_LOOP := MULTIMODE [keep interrogationId, LOOP, LOOP.FOO1]\n" + " [filter LOOP <> \"\"]\n" + " [rename LOOP.FOO1 to FOO1];\n" + "LOOP <- union(TEMP_LOOP, TEMP_LOOP);"); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java index 516a48c45..d291258b9 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java @@ -86,12 +86,12 @@ public void testUnaryStringFunction() throws ScriptException { context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.eval( - "dsTrim := trim(ds[keep string1]); " - + "dsLTrim := ltrim(ds[keep string1]); " - + "dsRTrim := rtrim(ds[keep string1]); " - + "dsUpper := upper(ds[keep string1]); " - + "dsLower := lower(ds[keep string1]); " - + "dsLen := length(ds[keep string1]);"); + "dsTrim := trim(ds[keep id, string1]); " + + "dsLTrim := ltrim(ds[keep id, string1]); " + + "dsRTrim := rtrim(ds[keep id, string1]); " + + "dsUpper := upper(ds[keep id, string1]); " + + "dsLower := lower(ds[keep id, string1]); " + + "dsLen := length(ds[keep id, string1]);"); assertThat(((Dataset) context.getAttribute("dsTrim")).getDataAsMap().get(0)) .isEqualTo(Map.of("id", "Toto", "string1", "toto")); assertThat(((Dataset) context.getAttribute("dsLTrim")).getDataAsMap().get(0)) @@ -127,7 +127,7 @@ public void testSubstrAtom() throws ScriptException { assertThat(context.getAttribute("s5")).isEqualTo(""); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := substr(ds[keep string1, string2], 2, 4);"); + Object res = engine.eval("res := substr(ds[keep id, string1, string2], 2, 4);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", "oto", "string2", ""), @@ -146,7 +146,7 @@ public void testReplaceAtom() throws ScriptException { assertThat(context.getAttribute("r2")).isEqualTo("de"); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := replace(ds[keep string1, string2], \"o\", \"O\");"); + Object res = engine.eval("res := replace(ds[keep id, string1, string2], \"o\", \"O\");"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", "tOtO", "string2", "t"), @@ -182,7 +182,7 @@ public void testInstrAtom() throws ScriptException { assertThat(context.getAttribute("i4")).isEqualTo(0L); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := instr(ds[keep string1, string2], \"o\", 0, 2);"); + Object res = engine.eval("res := instr(ds[keep id, string1, string2], \"o\", 0, 2);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", 4L, "string2", 0L), diff --git a/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java b/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java index fb2f07326..3966b7e06 100644 --- a/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java +++ b/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java @@ -95,10 +95,10 @@ public void testWithEmptyLines() throws ScriptException { """ - ds1 := data.ds1[calc var1 := cast(var1, integer), var2 := cast(var2, integer)]; + ds1 := data.ds1[calc identifier id := id, var1 := cast(var1, integer), var2 := cast(var2, integer)]; - ds2_out := other.ds2[calc var1 := cast(var1, integer), var2 := cast(var2, integer)]; + ds2_out := other.ds2[calc identifier id := id, var1 := cast(var1, integer), var2 := cast(var2, integer)]; ds_sum := ds1 + ds2_out; ds_mul <- ds_sum * 3; 'data.ds_res' <- ds_mul[filter mod(var1, 2) = 0][calc var_sum := var1 + var2]; diff --git a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java index 8d5870e1d..5560966a6 100644 --- a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java +++ b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java @@ -167,13 +167,14 @@ define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop] [filter year = "2021"] [calc pop := cast(pop, integer)] - [drop NB_COM, POP_MUNI]; + [drop year, NB_COM, POP_MUNI]; // Extract dataset on general practitioners from BPE by NUTS 3 in 2021 - GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = "D201" and year = "2021"]; + GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = "D201" and year = "2021"] + [drop facility_type, year]; // Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator - BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3) + BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021) [calc pract_per_10000_inhabitants := nb / pop * 10000] [drop nb, pop];\ """; diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java index bce7edfa2..85168f61b 100644 --- a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java +++ b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java @@ -168,7 +168,7 @@ public void bpeV1() throws ScriptException { "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" + "[filter TIME_PERIOD = \"2021\"]\n" + "[calc pop := cast(pop, integer)]\n" - + "[drop NB_COM, POP_MUNI];"); + + "[drop TIME_PERIOD, NB_COM, POP_MUNI];"); Dataset censusNuts2021 = (Dataset) engine.getContext().getAttribute("CENSUS_NUTS3_2021"); Structured.DataStructure censusNuts2021Structure = censusNuts2021.getDataStructure(); @@ -181,7 +181,8 @@ public void bpeV1() throws ScriptException { // Step 7 engine.eval( - "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"];"); + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" + + "[drop facility_type, TIME_PERIOD];"); Dataset generalNuts = (Dataset) engine.getContext().getAttribute("GENERAL_PRACT_NUTS3_2021"); Structured.DataStructure generalNutsStructure = generalNuts.getDataStructure(); @@ -194,7 +195,7 @@ public void bpeV1() throws ScriptException { // Step 8 engine.eval( - "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3)\n" + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" + "[calc pract_per_10000_inhabitants := nb / pop * 10000]\n" + "[drop nb, pop];"); diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java index d716fa4d2..c5f96d45f 100644 --- a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java +++ b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java @@ -16,7 +16,6 @@ import javax.script.ScriptEngineManager; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; public class SDMXVTLWorkflowTest { @@ -33,8 +32,6 @@ public void setUp() { engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark"); } - // Disabled for now, we have to update the remote resource - @Disabled @Test void testRefFromRepo() { @@ -64,9 +61,6 @@ void testGetEmptyDataset() { .isEqualTo( new Structured.DataStructure( List.of( - new Structured.Component( - "facility_type", String.class, Dataset.Role.IDENTIFIER), - new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER), new Structured.Component("nuts3", String.class, Dataset.Role.IDENTIFIER), new Structured.Component( "pract_per_10000_inhabitants", Double.class, Dataset.Role.MEASURE)))); @@ -95,16 +89,14 @@ public void testGetRulesetsVTL() { ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml"); SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of()); assertThat(sdmxVtlWorkflow.getRulesetsVTL()) - .isEqualToIgnoringWhitespace( + .isEqualTo( "define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is\n" - + " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode\n" - + " \"Municipality code is not in the correct format\"\n" - + " end datapoint ruleset;\n" - + " \n" - + " define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" - + " BOWLING_ALLEY_RULE : when facility_type = \"F102\" then nb > 10 errorcode \"Not enough bowling\n" - + " alleys\"\n" - + " end datapoint ruleset;"); + + " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" + + " end datapoint ruleset;\n" + + "\n" + + "define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" + + " BOWLING_ALLEY_RULE : when facility_type = \"F102\" then nb > 10 errorcode \"Not enough bowling alleys\"\n" + + " end datapoint ruleset;"); } @Test @@ -112,38 +104,36 @@ public void testGetTransformationsVTL() { ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml"); SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of()); assertThat(sdmxVtlWorkflow.getTransformationsVTL()) - .isEqualToIgnoringWhitespace( + .isEqualTo( "// Validation of municipality code in input file\n" + "CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid);\n" + "\n" + "// Clean BPE input database\n" - + "BPE_DETAIL_CLEAN := BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y]\n" - + " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" + + "BPE_DETAIL_CLEAN := BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y]\n" + + " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" + "\n" + "// BPE aggregation by municipality, type and year\n" - + "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year\n" - + " to TIME_PERIOD];\n" + + "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD];\n" + "\n" + "// BPE aggregation by NUTS 3, type and year\n" - + "BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3)\n" - + " else substr(municipality,1,2)]\n" - + " [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" + + "BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" + + " [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" + "\n" + "// BPE validation of facility types by NUTS 3\n" + "CHECK_NUTS3_TYPES := check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid);\n" + "\n" + "// Prepare 2021 census dataset by NUTS 3\n" - + "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" - + " [filter TIME_PERIOD = \"2021\"]\n" - + " [calc pop := cast(pop, integer)]\n" - + " [drop NB_COM, POP_MUNI];\n" + + "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" + + " [filter TIME_PERIOD = \"2021\"]\n" + + " [calc pop := cast(pop, integer)]\n" + + " [drop TIME_PERIOD, NB_COM, POP_MUNI];\n" + "\n" + "// Extract dataset on general practitioners from BPE by NUTS 3 in 2021\n" - + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"];\n" + + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" + + " [drop facility_type, TIME_PERIOD];\n" + "\n" - + "// Merge practitioners and legal population datasets by NUTS 3 in 2021\n" - + "// and compute an indicator\n" - + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3)\n" + + "// Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator\n" + + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" + " [calc pract_per_10000_inhabitants := nb / pop * 10000]\n" + " [drop nb, pop];"); } diff --git a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml index 1d16e408d..b16ece3e0 100644 --- a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml +++ b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml @@ -1,302 +1,206 @@ - + DSD_BPE_DETAIL_1049 false 2024-03-26T10:49:00Z - - - + + - - - Type d'équipements - La classification des équipements en gammes a pour objectif de réunir des - équipements qui présentent des logiques d'implantation voisines, en ce sens qu'ils sont fréquemment - présents dans les mêmes communes. Ces regroupements permettent d'élaborer des indicateurs - synthétiques reflétant l'organisation hiérarchisée des territoires en termes de services à la - population. Les gammes d’équipements sont actualisées chaque année pour une nouvelle version de la - Base Permanente des Équipements. En effet, d’une part de nouveaux équipements peuvent être - introduits dans la base et, d’autre part, l’implantation des équipements dans les communes peut être - modifiée, tout cela pouvant avoir des conséquences sur la composition des gammes. - - - POLICE - - - - Code officiel géographique 2021 - Les objets géographiques contenus dans le COG sont les collectivités - territoriales (communes, départements, régions, collectivités territoriales à statut particulier), - les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et - certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont - répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements - intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. - - - L'Abergement-Clémenciat - - + + + Type d'équipements + La classification des équipements en gammes a pour objectif de réunir des équipements qui présentent des logiques d'implantation voisines, en ce sens qu'ils sont fréquemment présents dans les mêmes communes. Ces regroupements permettent d'élaborer des indicateurs synthétiques reflétant l'organisation hiérarchisée des territoires en termes de services à la population. Les gammes d’équipements sont actualisées chaque année pour une nouvelle version de la Base Permanente des Équipements. En effet, d’une part de nouveaux équipements peuvent être introduits dans la base et, d’autre part, l’implantation des équipements dans les communes peut être modifiée, tout cela pouvant avoir des conséquences sur la composition des gammes. + + POLICE + + + + Code officiel géographique 2021 + Les objets géographiques contenus dans le COG sont les collectivités territoriales (communes, départements, régions, collectivités territoriales à statut particulier), les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. + + L'Abergement-Clémenciat + + Code officiel géographique - Les objets géographiques contenus dans le COG sont les collectivités - territoriales (communes, départements, régions, collectivités territoriales à statut particulier), - les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et - certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont - répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements - intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. - + Les objets géographiques contenus dans le COG sont les collectivités territoriales (communes, départements, régions, collectivités territoriales à statut particulier), les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. Ain - - - - Ensemble des concepts de l'Insee - - Equipement - - - Type de l'équipement - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - - - - - - Commune - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - - - - - - Année de référence - - - Abscisse (Système de coordonnées Lambert 93) - - - Ordonnée (Système de coordonnées Lambert 93) - - + + + + Ensemble des concepts de l'Insee + + Equipement + + + Type de l'équipement + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + + + + + Commune + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + + + + + Année de référence + + + Abscisse (Système de coordonnées Lambert 93) + + + Ordonnée (Système de coordonnées Lambert 93) + + Code officiel géographique - + Année - + Nombre de communes - + Nombre d'équipement - + Nombre de médecins généralistes pour 10 OOO habitants - + Population municipale - + Population totale - - + + - + Geolocalized Equipments (geography 2021) - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).EQUIPEMENT - - - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).EQUIPEMENT + + - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + - + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY - + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + - + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR - + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR - + - + - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_X - - - - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_X + + + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_Y - - - - + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_Y + + + - Cube populations légales - - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA - - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) - - - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TIME_PERIOD - - - - - - - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_COM - - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_MUNI - - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_TOT - - - - - + isExternalReference="false" + agencyID="FR1" + id="LEGAL_POP_CUBE" + version="1.0"> + Cube populations légales + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TIME_PERIOD + + + + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_COM + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_MUNI + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_TOT + + + + Cube BPE par ville - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT - + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR - + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT @@ -363,246 +251,201 @@ version="1.0"> Cube BPE par nuts 3 - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT - + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR - + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR - + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT - + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT - + Cube médecins généralistes par habitants ventillé par nuts 3 en 2010 - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_GENERALISTES_PAR_HABITANTS - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_GENERALISTES_PAR_HABITANTS - - Dataflow for BPE_DETAIL - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_DETAIL(1.0) - + + Dataflow for BPE_DETAIL + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_DETAIL(1.0) - - Dataflow for LEGAL_POP_CUBE - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP_CUBE(1.0) - + + Dataflow for LEGAL_POP_CUBE + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP_CUBE(1.0) - + Dataflow for BPE_MUNICIPALITY - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_MUNICIPALITY(1.0) - + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_MUNICIPALITY(1.0) - + Dataflow for BPE_NUTS3 - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_NUTS3(1.0) - + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_NUTS3(1.0) - + Dataflow for BPE_CENSUS_NUTS3_2021 - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_CENSUS_NUTS3_2021(1.0) - + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_CENSUS_NUTS3_2021(1.0) - - VTL Mapping Scheme #1 - - VTL Mapping #1 - urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:BPE_DETAIL(1.0) - - - VTL Mapping #2 - urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:LEGAL_POP_CUBE(1.0) - - + + VTL Mapping Scheme #1 + + VTL Mapping #1 + urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:BPE_DETAIL(1.0) + + + VTL Mapping #2 + urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:LEGAL_POP_CUBE(1.0) + - - Ruleset Scheme #1 - - Datapoint Ruleset UNIQUE_MUNICIPALITY - - define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is - MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode - "Municipality code is not in the correct format" - end datapoint ruleset; - - - - Datapoint Ruleset NUTS3_TYPES - - define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is - BOWLING_ALLEY_RULE : when facility_type = "F102" then nb > 10 errorcode "Not enough bowling - alleys" - end datapoint ruleset; - - - + + Ruleset Scheme #1 + + Datapoint Ruleset UNIQUE_MUNICIPALITY + + define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is + MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode "Municipality code is not in the correct format" + end datapoint ruleset; + + + + Datapoint Ruleset NUTS3_TYPES + + define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is + BOWLING_ALLEY_RULE : when facility_type = "F102" then nb > 10 errorcode "Not enough bowling alleys" + end datapoint ruleset; + + + - - Transformation Scheme for BPE - CENSUS - - Step 1 - Validation of municipality code in input file - check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid) - CHECK_MUNICIPALITY - - - Step 2 - Clean BPE input database - - BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y] - [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year] - - BPE_DETAIL_CLEAN - - - Step 3 - BPE aggregation by municipality, type and year - - BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year - to TIME_PERIOD] - - BPE_MUNICIPALITY - - - Step 4 - BPE aggregation by NUTS 3, type and year - - BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = "97" then substr(municipality,1,3) - else substr(municipality,1,2)] - [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type] - - BPE_NUTS3 - - - Step 5 - BPE validation of facility types by NUTS 3 - check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid) - CHECK_NUTS3_TYPES - - - Step 6 - Prepare 2021 census dataset by NUTS 3 - - LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop] - [filter TIME_PERIOD = "2021"] - [calc pop := cast(pop, integer)] - [drop NB_COM, POP_MUNI] - - CENSUS_NUTS3_2021 - - - Step 7 - Extract dataset on general practitioners from BPE by NUTS 3 in 2021 - - - BPE_NUTS3 [filter facility_type = "D201" and TIME_PERIOD = "2021"] - - GENERAL_PRACT_NUTS3_2021 - - - Step 8 - Merge practitioners and legal population datasets by NUTS 3 in 2021 - and compute an indicator - - - inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3) + + Transformation Scheme for BPE - CENSUS + + Step 1 + Validation of municipality code in input file + check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid) + CHECK_MUNICIPALITY + + + Step 2 + Clean BPE input database + + BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y] + [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year] + + BPE_DETAIL_CLEAN + + + Step 3 + BPE aggregation by municipality, type and year + + BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD] + + BPE_MUNICIPALITY + + + Step 4 + BPE aggregation by NUTS 3, type and year + + BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = "97" then substr(municipality,1,3) else substr(municipality,1,2)] + [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type] + + BPE_NUTS3 + + + Step 5 + BPE validation of facility types by NUTS 3 + check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid) + CHECK_NUTS3_TYPES + + + Step 6 + Prepare 2021 census dataset by NUTS 3 + + LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop] + [filter TIME_PERIOD = "2021"] + [calc pop := cast(pop, integer)] + [drop TIME_PERIOD, NB_COM, POP_MUNI] + + CENSUS_NUTS3_2021 + + + Step 7 + Extract dataset on general practitioners from BPE by NUTS 3 in 2021 + + BPE_NUTS3 [filter facility_type = "D201" and TIME_PERIOD = "2021"] + [drop facility_type, TIME_PERIOD] + + GENERAL_PRACT_NUTS3_2021 + + + Step 8 + Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator + + inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021) [calc pract_per_10000_inhabitants := nb / pop * 10000] [drop nb, pop] - - BPE_CENSUS_NUTS3_2021 - - urn:sdmx:org.sdmx.infomodel.transformation.VtlMappingScheme=FR1:VTLMS1(1.0) - - urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0) - - - + + BPE_CENSUS_NUTS3_2021 + + urn:sdmx:org.sdmx.infomodel.transformation.VtlMappingScheme=FR1:VTLMS1(1.0) + urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0) + + \ No newline at end of file diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java index d0e97f7dd..5cfc73340 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java @@ -130,10 +130,11 @@ public void testParquetMetadataWriting(@TempDir Path tmpDirectory) throws Script context.setAttribute("ds2", dsWithMetadata, ScriptContext.ENGINE_SCOPE); - engine.eval("ds3 := ds2[calc identifier year := year];"); + engine.eval("ds3 := ds2[calc attribute school_id := school_id, identifier year := year];"); SparkDataset dsWithMetadataAndRoles = (SparkDataset) engine.getContext().getAttribute("ds3"); + assertTrue(dsWithMetadataAndRoles.getDataStructure().get("school_id").isAttribute()); assertTrue(dsWithMetadataAndRoles.getDataStructure().get("year").isIdentifier()); } } diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java index 10808118b..3ad9bb9d3 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java @@ -47,7 +47,7 @@ public void testOperators() throws ScriptException { engine.eval( "res := ds1#long1; " + "res1 := isnull(ds1); " - + "ds_1 := ds1[keep long1, double1]; ds_2 := ds2[keep long1, double1]; " + + "ds_1 := ds1[keep id, long1, double1]; ds_2 := ds2[keep id, long1, double1]; " + "res2 := ds_1 + ds_2; " + "res3 := ds_1 - ds_2; " + "res4 := ds_1 * ds_2; " @@ -67,8 +67,7 @@ public void testOperators() throws ScriptException { + "res18 := mod(ds_1, 5); " + "res19 := power(ds_1, 5); " + "res20 := log(abs(ds_1), 5); " - + "ds_11 := ds1[keep string1, string2]; " - + "ds_22 := ds2[keep string1][calc string2 := string1]; " + + "ds_11 := ds1[keep id, string1, string2]; ds_22 := ds2[keep id, string1][calc string2 := string1]; " + "res21 := ds_11 || ds_22; "); var res = engine.getContext().getAttribute("res21"); assertThat(((Dataset) res).getDataStructure().get("string1").getType()).isEqualTo(String.class); @@ -79,8 +78,8 @@ public void testPlan() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep long1][rename long1 to bool_var]; " + "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + "res := if ds1 > ds2 then ds1 else ds2;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java index 5512f86d6..842758114 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java @@ -59,7 +59,7 @@ public void testProjection() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval("ds1 := ds[keep age];"); + engine.eval("ds1 := ds[keep name, age];"); assertThat(engine.getContext().getAttribute("ds")) .isInstanceOf(fr.insee.vtl.model.Dataset.class); diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java index 888bc55b7..19f8eff0d 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java @@ -713,9 +713,9 @@ public void testValidationSimpleException() throws ScriptException { Dataset DS_r_to_rename = (Dataset) engine.getContext().getAttribute("DS_r_to_rename"); List DS_r_to_renameMeasure = DS_r_to_rename.getDataStructure().values().stream() - .filter(Structured.Component::isMeasure) - .map(Structured.Component::getName) - .toList(); + .filter(c -> c.isMeasure()) + .map(c -> c.getName()) + .collect(Collectors.toList()); assertThat(DS_r_to_renameMeasure.size()).isEqualTo(4); assertThat(DS_r_to_renameMeasure.contains("imbalance")).isTrue(); } @@ -744,8 +744,8 @@ public void serializationCheckDatapointTest() throws ScriptException { "ds1_1 := ds1[calc identifier id := id, long1 := cast(long1, integer), double1 := cast(double1, number), bool1 := cast(bool1, boolean)]; " + "ds2_1 := ds2[calc identifier id := id, long1 := cast(long1, integer), double1 := cast(double1, number), bool1 := cast(bool1, boolean)]; " + "ds_concat := ds1_1#string1 || \" and \" || ds2_1#string1; " - + "ds1_num := ds1_1[keep long1, double1]; " - + "ds2_num := ds2_1[keep long1, double1]; " + + "ds1_num := ds1_1[keep id, long1, double1]; " + + "ds2_num := ds2_1[keep id, long1, double1]; " + "ds_mod := mod(ds1_num, 2); " + "ds_sum := ds1_num + ds2_num; " + "ds_compare := ds1_num = ds2_num; " From b47035b8616cbdc4dc809df309a7233b856bd42e Mon Sep 17 00:00:00 2001 From: Hadrien Kohl Date: Tue, 13 Jan 2026 14:42:54 +0100 Subject: [PATCH 12/15] Keep / Drop changes --- .../vtl/engine/visitors/ClauseVisitor.java | 76 +- .../insee/vtl/engine/utils/dag/DagTest.java | 46 +- .../engine/visitors/ClauseVisitorTest.java | 25 +- .../ArithmeticExprOrConcatTest.java | 6 +- .../expression/ArithmeticExprTest.java | 6 +- .../visitors/expression/BooleanExprTest.java | 4 +- .../expression/ComparisonExprTest.java | 14 +- .../expression/ConditionalExprTest.java | 16 +- .../visitors/expression/UnaryExprTest.java | 4 +- .../functions/ComparisonFunctionsTest.java | 7 +- .../functions/DistanceFunctionsTest.java | 2 +- .../functions/JoinFunctionsTest.java | 10 +- .../functions/NumericFunctionsTest.java | 24 +- .../functions/SetFunctionsVisitorTest.java | 4 +- .../functions/StringFunctionsTest.java | 18 +- .../vtl/prov/ProvenanceListenerTest.java | 4 +- .../test/java/fr/insee/vtl/prov/RDFTest.java | 7 +- .../src/test/java/fr/insee/vtl/BPETest.java | 7 +- .../fr/insee/vtl/SDMXVTLWorkflowTest.java | 52 +- .../src/test/resources/DSD_BPE_CENSUS.xml | 785 +++++++++++------- .../fr/insee/vtl/spark/SparkDatasetTest.java | 3 +- .../processing.engine/OperatorsTest.java | 9 +- .../spark/processing.engine/ProjectTest.java | 2 +- .../processing.engine/ValidationTest.java | 10 +- 24 files changed, 691 insertions(+), 450 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index fefde8900..d8853822a 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -13,6 +13,7 @@ import fr.insee.vtl.parser.VtlBaseVisitor; import fr.insee.vtl.parser.VtlParser; import java.util.*; +import java.util.function.Function; import java.util.stream.Collectors; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.misc.Interval; @@ -106,16 +107,73 @@ private static AggregationExpression convertToAggregation( @Override public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext ctx) { - // Normalize to keep operation. - var keep = ctx.op.getType() == VtlParser.KEEP; - var names = ctx.componentID().stream().map(ClauseVisitor::getName).collect(Collectors.toSet()); - List columnNames = - datasetExpression.getDataStructure().values().stream() - .map(Dataset.Component::getName) - .filter(name -> keep == names.contains(name)) - .collect(Collectors.toList()); - return processingEngine.executeProject(datasetExpression, columnNames); + // The type of the op can either be KEEP or DROP. + boolean keep = ctx.op.getType() == VtlParser.KEEP; + + // Columns explicitly requested in the KEEP/DROP clause + List cleanColumnNames = ctx.componentID().stream().map(ClauseVisitor::getName).toList(); + + Collection inputColumns = datasetExpression.getDataStructure().keySet(); + + // Dataset identifiers (role = IDENTIFIER) + Map identifiers = + datasetExpression.getDataStructure().getIdentifiers().stream() + .collect(Collectors.toMap(Structured.Component::getName, Function.identity())); + + // Evaluate that all requested columns must exist in the dataset or raise an error + for (String requested : cleanColumnNames) { + if (!inputColumns.contains(requested)) { + throw new VtlRuntimeException( + new InvalidArgumentException( + // TODO: use actual column context. + String.format("'%s' not found in dataset.", requested), fromContext(ctx))); + } + } + + // VTL specification: identifiers must not appear explicitly in KEEP + Set forbidden = + cleanColumnNames.stream() + .filter(identifiers::containsKey) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + if (!forbidden.isEmpty()) { + StringBuilder details = new StringBuilder(); + for (String id : forbidden) { + Dataset.Component comp = identifiers.get(id); + details.append( + String.format( + "%s(role=%s, type=%s) ", + id, comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")); + } + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "identifiers %s must not be explicitly listed in KEEP/DROP. Details: %s", + forbidden, details.toString().trim()), + // TODO: use actual column context. + fromContext(ctx))); + } + + // Build result set: + // + KEEP: identifiers + requested columns + // + DROP: (all columns - requested) + identifiers + final Set resultSet = new LinkedHashSet<>(); + resultSet.addAll(identifiers.keySet()); + if (keep) { + resultSet.addAll(cleanColumnNames); + } else { + for (String col : inputColumns) { + if (!cleanColumnNames.contains(col)) { + resultSet.add(col); + } + } + } + + // Retrieve the output column names (identifiers + requested) + final List outputColumns = + inputColumns.stream().filter(resultSet::contains).collect(Collectors.toList()); + return processingEngine.executeProject(datasetExpression, outputColumns); } @Override diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java index 841ba57e8..fcb4a6037 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java @@ -199,11 +199,11 @@ void testDagSimpleExampleWithReordering() throws ScriptException { void testDagCycle() { final String script = """ - e := a; - b := a; - c := b; - a := c; - f := a;"""; + e := a; + b := a; + c := b; + a := c; + f := a;"""; final Positioned.Position mainPosition = getPositionOfStatementInScript("a := c", script); final List otherPositions = @@ -231,14 +231,14 @@ void testDagCycle() { void testMultipleCycles() { final String script = """ - h := g; - i := join(h, input_ds); - g := i; - e := a; - b := a; - c := b; - a := c; - f := a;"""; + h := g; + i := join(h, input_ds); + g := i; + e := a; + b := a; + c := b; + a := c; + f := a;"""; final Positioned.Position mainExceptionMainPosition = getPositionOfStatementInScript("g := i", script); @@ -446,8 +446,8 @@ void testDagIfExpr() throws ScriptException { engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( "res := if ds1 > ds2 then ds1 else ds2; " - + "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep id, long1][rename long1 to bool_var];"); + + "ds1 := ds_1[keep long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep long1][rename long1 to bool_var];"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -463,7 +463,7 @@ void testDagCaseExpr() throws ScriptException { engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( "res0 <- tmp0[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1]; " - + "tmp0 := ds_1[keep id, long1];"); + + "tmp0 := ds_1[keep long1];"); Object res0 = engine.getContext().getAttribute("res0"); assertThat(((Dataset) res0).getDataAsMap()) .containsExactlyInAnyOrder( @@ -474,7 +474,7 @@ void testDagCaseExpr() throws ScriptException { assertThat(((Dataset) res0).getDataStructure().get("c").getType()).isEqualTo(String.class); engine.eval( "res1 <- tmp1[calc c := case when long1 > 30 then 1 else 0][drop long1]; " - + "tmp1 := ds_1[keep id, long1];"); + + "tmp1 := ds_1[keep long1];"); Object res1 = engine.getContext().getAttribute("res1"); assertThat(((Dataset) res1).getDataAsMap()) .containsExactlyInAnyOrder( @@ -484,9 +484,9 @@ void testDagCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", 1L)); assertThat(((Dataset) res1).getDataStructure().get("c").getType()).isEqualTo(Long.class); engine.eval( - "tmp2_alt_ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " + "tmp2_alt_ds1 := ds_1[keep long1][rename long1 to bool_var]; " + "res2 <- case when tmp2_alt_ds1 < 30 then tmp2_alt_ds1 else tmp2_alt_ds2; " - + "tmp2_alt_ds2 := ds_2[keep id, long1][rename long1 to bool_var];"); + + "tmp2_alt_ds2 := ds_2[keep long1][rename long1 to bool_var];"); Object resDs = engine.getContext().getAttribute("res2"); assertThat(((Dataset) resDs).getDataAsMap()) .containsExactlyInAnyOrder( @@ -498,7 +498,7 @@ void testDagCaseExpr() throws ScriptException { @Test void testDagNvlExpr() throws ScriptException { engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res <- nvl(tmp1[keep id, long1], 0); tmp1 := ds1;"); + engine.eval("res <- nvl(tmp1[keep long1], 0); tmp1 := ds1;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -512,7 +512,7 @@ void testDagNvlExpr() throws ScriptException { @Test void testDagNvlImplicitCast() throws ScriptException { engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(tmp1[keep id, long1], 0.1); tmp1 <- ds1;"); + engine.eval("res := nvl(tmp1[keep long1], 0.1); tmp1 <- ds1;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -528,7 +528,7 @@ void testDagUnaryExpr() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := + tmp1[keep id, long1, double1]; tmp1 <- ds2;"); + Object res = engine.eval("res := + tmp1[keep long1, double1]; tmp1 <- ds2;"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D), @@ -537,7 +537,7 @@ void testDagUnaryExpr() throws ScriptException { assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Long.class); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res2 = engine.eval("res2 := - tmp2[keep id, long1, double1]; tmp2 := ds2;"); + Object res2 = engine.eval("res2 := - tmp2[keep long1, double1]; tmp2 := ds2;"); assertThat(((Dataset) res2).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index 5c25d981b..4c06bb405 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -87,7 +87,7 @@ public void testManyCalc() throws ScriptException { } @Test - public void testCalcRoleModifier() throws ScriptException { + public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptException { InMemoryDataset dataset = new InMemoryDataset( List.of( @@ -194,7 +194,8 @@ public void testKeepDropClause() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval("ds2 := ds1[keep name, age];"); + // KEEP: identifiers must not be listed explicitly; they are implicitly preserved. + engine.eval("ds2 := ds1[keep age];"); assertThat(engine.getContext().getAttribute("ds2")).isInstanceOf(Dataset.class); assertThat(((Dataset) engine.getContext().getAttribute("ds2")).getDataAsMap()) @@ -213,6 +214,26 @@ public void testKeepDropClause() throws ScriptException { Map.of("name", "Franck", "age", 12L)); } + /** KEEP/DROP: listing identifiers explicitly must raise a script error. */ + @Test + public void testKeepDropClause_identifierExplicitShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L), + Map.of("name", "Franck", "age", 12L, "weight", 9L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + ScriptContext context = engine.getContext(); + context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[keep name, age];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("identifiers [name] must not be explicitly listed in KEEP/DROP"); + } + @Test public void testAggregateType() { InMemoryDataset dataset = diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java index 3d61e46ba..d3cb18544 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java @@ -52,7 +52,7 @@ public void testPlus() throws ScriptException { assertThat(context.getAttribute("plus2")).isEqualTo(5.0); context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds1[keep id, long1, long2] + ds1[keep id, long1, long2];"); + Object res = engine.eval("res := ds1[keep long1, long2] + ds1[keep long1, long2];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 60L, "long2", 600L), @@ -75,7 +75,7 @@ public void testMinus() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds2[keep id, long1] - ds1[keep id, long1] + 1;"); + Object res = engine.eval("res := ds2[keep long1] - ds1[keep long1] + 1;"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 141L), @@ -92,7 +92,7 @@ public void testConcat() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ds2[keep id, string1] || \" \" || ds1[keep id, string1];"); + Object res = engine.eval("res := ds2[keep string1] || \" \" || ds1[keep string1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "string1", "hadrien hadrien"), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java index c5822ea0f..353838880 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java @@ -64,8 +64,7 @@ public void testArithmeticExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = - engine.eval("res := round(ds1[keep id, long1, double1] * ds2[keep id, long1, double1]);"); + Object res = engine.eval("res := round(ds1[keep long1, double1] * ds2[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 1500.0, "double1", 1.0), @@ -83,8 +82,7 @@ public void testArithmeticExpr() throws ScriptException { engine.eval("div4 := 3.0 / 1.5;"); assertThat(context.getAttribute("div4")).isEqualTo(2.0); - res = - engine.eval("res2 := round(ds1[keep id, long1, double1] / ds2[keep id, long1, double1]);"); + res = engine.eval("res2 := round(ds1[keep long1, double1] / ds2[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 0.0, "double1", 1.0), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java index 7e7216f6d..d9ba19e7d 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java @@ -100,8 +100,8 @@ public void testOnDatasets() throws ScriptException { context.setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds_2", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, bool2][rename bool2 to bool1]; " - + "ds2 := ds_2[keep id, bool1]; " + "ds1 := ds_1[keep bool2][rename bool2 to bool1]; " + + "ds2 := ds_2[keep bool1]; " + "andDs := ds1 and ds2; " + "orDs := ds1 or ds2; " + "xorDs := ds1 xor ds2; "); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java index 70921ac91..297becc48 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java @@ -73,7 +73,7 @@ public void testComparisonExpr() throws ScriptException { context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("equal := ds1[keep id, long1] = ds2[keep id, long1];"); + engine.eval("equal := ds1[keep long1] = ds2[keep long1];"); var equal = engine.getContext().getAttribute("equal"); assertThat(((Dataset) equal).getDataAsMap()) .containsExactlyInAnyOrder( @@ -90,7 +90,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("long1")).isTrue(); engine.eval("mix1 := 6 <> (3*20.0);"); assertThat((Boolean) context.getAttribute("mix1")).isTrue(); - engine.eval("notEqual := ds1[keep id, long1] <> ds2[keep id, long1];"); + engine.eval("notEqual := ds1[keep long1] <> ds2[keep long1];"); var notEqual = engine.getContext().getAttribute("notEqual"); assertThat(((Dataset) notEqual).getDataAsMap()) .containsExactlyInAnyOrder( @@ -106,7 +106,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("lt1")).isFalse(); engine.eval("mix2 := 6 < 6.1;"); assertThat((Boolean) context.getAttribute("mix2")).isTrue(); - engine.eval("lt2 := ds1[keep id, long1] < ds2[keep id, long1];"); + engine.eval("lt2 := ds1[keep long1] < ds2[keep long1];"); var lt = engine.getContext().getAttribute("lt2"); assertThat(((Dataset) lt).getDataAsMap()) .containsExactlyInAnyOrder( @@ -122,7 +122,7 @@ public void testComparisonExpr() throws ScriptException { assertThat((Boolean) context.getAttribute("mt1")).isTrue(); engine.eval("mix4 := 6 > 6.1;"); assertThat((Boolean) context.getAttribute("mix4")).isFalse(); - engine.eval("mt2 := ds1[keep id, long1] > ds2[keep id, long1];"); + engine.eval("mt2 := ds1[keep long1] > ds2[keep long1];"); var mt = engine.getContext().getAttribute("mt2"); assertThat(((Dataset) mt).getDataAsMap()) .containsExactlyInAnyOrder( @@ -139,7 +139,7 @@ public void testComparisonExpr() throws ScriptException { engine.eval("mix5 := 6 <= 6.1;"); assertThat((Boolean) context.getAttribute("mix5")).isTrue(); - engine.eval("le2 := ds1[keep id, long1] <= ds2[keep id, long1];"); + engine.eval("le2 := ds1[keep long1] <= ds2[keep long1];"); var le = engine.getContext().getAttribute("le2"); assertThat(((Dataset) le).getDataAsMap()) .containsExactlyInAnyOrder( @@ -156,7 +156,7 @@ public void testComparisonExpr() throws ScriptException { engine.eval("mix6 := 6 >= 6.1;"); assertThat((Boolean) context.getAttribute("mix6")).isFalse(); - engine.eval("me2 := ds1[keep id, long1] >= ds2[keep id, long1];"); + engine.eval("me2 := ds1[keep long1] >= ds2[keep long1];"); var me = engine.getContext().getAttribute("me2"); assertThat(((Dataset) me).getDataAsMap()) .containsExactlyInAnyOrder( @@ -198,7 +198,7 @@ public void testInNotIn() throws ScriptException { assertThat((Boolean) engine.getContext().getAttribute("res4")).isTrue(); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("me := ds[keep id, long1, string1] in {\"toto\", \"franck\"};"); + engine.eval("me := ds[keep long1, string1] in {\"toto\", \"franck\"};"); var in = engine.getContext().getAttribute("me"); assertThat(((Dataset) in).getDataAsMap()) .containsExactlyInAnyOrder( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java index 2994c97c7..5ce05c628 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java @@ -46,8 +46,8 @@ public void testIfExpr() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + "ds1 := ds_1[keep long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep long1][rename long1 to bool_var]; " + "res := if ds1 > ds2 then ds1 else ds2;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) @@ -73,7 +73,7 @@ public void testCaseExpr() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, long1]; " + "ds1 := ds_1[keep long1]; " + "res <- ds1[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1];"); Object res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) @@ -84,7 +84,7 @@ public void testCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", "ok")); assertThat(((Dataset) res).getDataStructure().get("c").getType()).isEqualTo(String.class); engine.eval( - "ds2 := ds_1[keep id, long1]; " + "ds2 := ds_1[keep long1]; " + "res2 <- ds2[calc c := case when long1 > 30 then 1 else 0][drop long1];"); Object res2 = engine.getContext().getAttribute("res2"); assertThat(((Dataset) res2).getDataAsMap()) @@ -95,8 +95,8 @@ public void testCaseExpr() throws ScriptException { Map.of("id", "Franck", "c", 1L)); assertThat(((Dataset) res2).getDataStructure().get("c").getType()).isEqualTo(Long.class); engine.eval( - "ds3 := ds_1[keep id, long1][rename long1 to bool_var];" - + "ds4 := ds_2[keep id, long1][rename long1 to bool_var]; " + "ds3 := ds_1[keep long1][rename long1 to bool_var];" + + "ds4 := ds_2[keep long1][rename long1 to bool_var]; " + "res_ds <- case when ds3 < 30 then ds3 else ds4;"); Object res_ds = engine.getContext().getAttribute("res_ds"); assertThat(((Dataset) res_ds).getDataAsMap()) @@ -115,7 +115,7 @@ public void testNvlExpr() throws ScriptException { assertThat(context.getAttribute("s2")).isEqualTo("default"); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(ds[keep id, long1], 0);"); + engine.eval("res := nvl(ds[keep long1], 0);"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( @@ -142,7 +142,7 @@ public void testNvlImplicitCast() throws ScriptException { assertThat(context.getAttribute("s2")).isEqualTo(1.1D); engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - engine.eval("res := nvl(ds[keep id, long1], 0.1);"); + engine.eval("res := nvl(ds[keep long1], 0.1);"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java index 45ef35973..bbe9aea24 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/UnaryExprTest.java @@ -44,7 +44,7 @@ public void testUnaryExpr() throws ScriptException { assertThat(context.getAttribute("plus1")).isEqualTo(1.5D); context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res1 := + ds2[keep id, long1, double1];"); + Object res = engine.eval("res1 := + ds2[keep long1, double1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D), @@ -57,7 +57,7 @@ public void testUnaryExpr() throws ScriptException { engine.eval("plus3 := - 1.5;"); assertThat(context.getAttribute("plus3")).isEqualTo(-1.5D); - res = engine.eval("res2 := - ds2[keep id, long1, double1];"); + res = engine.eval("res2 := - ds2[keep long1, double1];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java index 9e307d085..e19a3d273 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/ComparisonFunctionsTest.java @@ -50,7 +50,7 @@ public void testBetweenAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("b2")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := between(ds[keep id, long1, double2], 5, 15);"); + Object res = engine.eval("res := between(ds[keep long1, double2], 5, 15);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", false, "double2", false), @@ -80,8 +80,7 @@ public void testCharsetMatchAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("t3")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = - engine.eval("res := match_characters(ds[keep id, string1, string2], \"(.*)o(.*)\");"); + Object res = engine.eval("res := match_characters(ds[keep string1, string2], \"(.*)o(.*)\");"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", true, "string2", false), @@ -114,7 +113,7 @@ public void testIsNullAtom() throws ScriptException { assertThat((Boolean) context.getAttribute("n1")).isFalse(); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := isnull(ds[keep id, string1, bool1]);"); + Object res = engine.eval("res := isnull(ds[keep string1, bool1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", false, "bool1", false), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java index f83d6837f..787e230b0 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/DistanceFunctionsTest.java @@ -43,7 +43,7 @@ public void testLevenshteinAtom() throws ScriptException { context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); Object res = engine.eval( - "res := levenshtein(ds[keep id, string1], ds[keep id, string2][rename string2 to string1])[rename string1 to lev];"); + "res := levenshtein(ds[keep string1], ds[keep string2][rename string2 to string1])[rename string1 to lev];"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "lev", 3L), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java index e79293263..f18c03612 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java @@ -137,7 +137,8 @@ public void testLeftJoinWithDifferentIdentifiers() throws ScriptException { result2 := left_join(ds_1_1, ds_2 using Id_2);\ """)) .isInstanceOf(InvalidArgumentException.class) - .hasMessage("using component Id_2 has to be an identifier"); + .hasMessage( + "CALC cannot overwrite IDENTIFIER 'Id_2' (role=IDENTIFIER, type=class java.lang.Long)."); } @Test @@ -230,9 +231,8 @@ public void testLeftJoinMixedStructure() throws ScriptException { engine.getContext().setAttribute("ds1", dataset1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds2", dataset2, ScriptContext.ENGINE_SCOPE); - engine.eval( - "unionData := union(ds1[keep id, measure1, measure2], ds2[keep id, measure1, measure2]);"); - engine.eval("ds1_keep := ds1[keep id, color];"); + engine.eval("unionData := union(ds1[keep measure1, measure2], ds2[keep measure1, measure2]);"); + engine.eval("ds1_keep := ds1[keep color];"); engine.eval("joinData := left_join(unionData, ds1_keep);"); Dataset joinData = (Dataset) engine.getBindings(ScriptContext.ENGINE_SCOPE).get("joinData"); @@ -260,7 +260,7 @@ public void testInnerJoin() throws ScriptException { engine.getContext().setAttribute("ds_1", ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", ds2, ScriptContext.ENGINE_SCOPE); - engine.eval("result1 := inner_join(ds_1[keep id1, id2, m1] as ds1, ds_2 as ds2);"); + engine.eval("result1 := inner_join(ds_1[keep m1] as ds1, ds_2 as ds2);"); var result = (Dataset) engine.getContext().getAttribute("result1"); assertThat(result.getColumnNames()).containsExactlyInAnyOrder("id1", "id2", "m1", "m2"); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java index d26b9617a..558e1f720 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/NumericFunctionsTest.java @@ -85,7 +85,7 @@ public void testCeil() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(0L); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := ceil(ds[keep id, long1, double1]);"); + Object res = engine.eval("res := ceil(ds[keep long1, double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "long1", 150L, "double1", 2L), @@ -112,7 +112,7 @@ public void testFloor() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(-1L); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(ds[keep id, double1]);"); + Object res = engine.eval("res := floor(ds[keep double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 1L), @@ -135,7 +135,7 @@ public void testAbs() throws ScriptException { assertThat(context.getAttribute("b")).isEqualTo(5.5D); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := abs(ds[keep id, double1]);"); + Object res = engine.eval("res := abs(ds[keep double1]);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 1.1D), @@ -164,7 +164,7 @@ public void testExp() throws ScriptException { assertThat(((Double) context.getAttribute("d"))) .isCloseTo(0.367D, Percentage.withPercentage(1)); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(exp(ds[keep id, double1, long1]));"); + Object res = engine.eval("res := floor(exp(ds[keep double1, long1]));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 3L, "long1", 9223372036854775807L), @@ -192,7 +192,7 @@ public void testLn() throws ScriptException { .isCloseTo(-0.69D, Percentage.withPercentage(1)); context.setAttribute("ds", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := floor(ln(abs(ds[keep id, double1])));"); + Object res = engine.eval("res := floor(ln(abs(ds[keep double1])));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Hadrien", "double1", 0L), @@ -221,7 +221,7 @@ public void testRound() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(12350D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(ds[keep id, long1, double2], 1);"); + Object res = engine.eval("res := round(ds[keep long1, double2], 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 30.0D, "double2", 1.2D), @@ -258,7 +258,7 @@ public void testTrunc() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(12340D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(ds[keep id, long1, double2], 1);"); + Object res = engine.eval("res := trunc(ds[keep long1, double2], 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 30.0D, "double2", 1.2D), @@ -289,7 +289,7 @@ public void testSqrt() throws ScriptException { assertThat(context.getAttribute("c")).isEqualTo(0D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := round(sqrt(ds[keep id, long1, double2]));"); + Object res = engine.eval("res := round(sqrt(ds[keep long1, double2]));"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 5.0D, "double2", 1D), @@ -321,7 +321,7 @@ public void testMod() throws ScriptException { assertThat(context.getAttribute("d")).isEqualTo(9D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(mod(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(mod(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 0.0D, "double2", 1.2D), @@ -353,7 +353,7 @@ public void testPower() throws ScriptException { assertThat(context.getAttribute("e")).isEqualTo(-125D); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(power(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(power(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 900.0D, "double2", 1.4D), @@ -383,7 +383,7 @@ public void testRandom() throws ScriptException { assertThat((Double) context.getAttribute("a")).isCloseTo(0.72D, Percentage.withPercentage(0.2)); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(random(ds[keep id, long1], 200), 1);"); + Object res = engine.eval("res := trunc(random(ds[keep long1], 200), 1);"); assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Double.class); assertThatThrownBy( @@ -410,7 +410,7 @@ public void testLog() throws ScriptException { .isCloseTo(3.01D, Percentage.withPercentage(0.01)); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := trunc(log(ds[keep id, long1, double2], 2), 1);"); + Object res = engine.eval("res := trunc(log(ds[keep long1, double2], 2), 1);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "long1", 4.9D, "double2", 0.2D), diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java index d548831ba..510b58d6d 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/SetFunctionsVisitorTest.java @@ -265,9 +265,9 @@ public void testUnion456Issue() throws ScriptException { context.getBindings(ScriptContext.ENGINE_SCOPE).put("MULTIMODE", multimodeDs); engine.eval( - "TEMP_RACINE := MULTIMODE [keep interrogationId, FOO];\n" + "TEMP_RACINE := MULTIMODE [keep FOO];\n" + "RACINE := union(TEMP_RACINE, TEMP_RACINE) ;\n" - + "TEMP_LOOP := MULTIMODE [keep interrogationId, LOOP, LOOP.FOO1]\n" + + "TEMP_LOOP := MULTIMODE [keep LOOP.FOO1]\n" + " [filter LOOP <> \"\"]\n" + " [rename LOOP.FOO1 to FOO1];\n" + "LOOP <- union(TEMP_LOOP, TEMP_LOOP);"); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java index d291258b9..516a48c45 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/StringFunctionsTest.java @@ -86,12 +86,12 @@ public void testUnaryStringFunction() throws ScriptException { context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.eval( - "dsTrim := trim(ds[keep id, string1]); " - + "dsLTrim := ltrim(ds[keep id, string1]); " - + "dsRTrim := rtrim(ds[keep id, string1]); " - + "dsUpper := upper(ds[keep id, string1]); " - + "dsLower := lower(ds[keep id, string1]); " - + "dsLen := length(ds[keep id, string1]);"); + "dsTrim := trim(ds[keep string1]); " + + "dsLTrim := ltrim(ds[keep string1]); " + + "dsRTrim := rtrim(ds[keep string1]); " + + "dsUpper := upper(ds[keep string1]); " + + "dsLower := lower(ds[keep string1]); " + + "dsLen := length(ds[keep string1]);"); assertThat(((Dataset) context.getAttribute("dsTrim")).getDataAsMap().get(0)) .isEqualTo(Map.of("id", "Toto", "string1", "toto")); assertThat(((Dataset) context.getAttribute("dsLTrim")).getDataAsMap().get(0)) @@ -127,7 +127,7 @@ public void testSubstrAtom() throws ScriptException { assertThat(context.getAttribute("s5")).isEqualTo(""); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := substr(ds[keep id, string1, string2], 2, 4);"); + Object res = engine.eval("res := substr(ds[keep string1, string2], 2, 4);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", "oto", "string2", ""), @@ -146,7 +146,7 @@ public void testReplaceAtom() throws ScriptException { assertThat(context.getAttribute("r2")).isEqualTo("de"); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := replace(ds[keep id, string1, string2], \"o\", \"O\");"); + Object res = engine.eval("res := replace(ds[keep string1, string2], \"o\", \"O\");"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", "tOtO", "string2", "t"), @@ -182,7 +182,7 @@ public void testInstrAtom() throws ScriptException { assertThat(context.getAttribute("i4")).isEqualTo(0L); context.setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); - Object res = engine.eval("res := instr(ds[keep id, string1, string2], \"o\", 0, 2);"); + Object res = engine.eval("res := instr(ds[keep string1, string2], \"o\", 0, 2);"); assertThat(((Dataset) res).getDataAsMap()) .containsExactlyInAnyOrder( Map.of("id", "Toto", "string1", 4L, "string2", 0L), diff --git a/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java b/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java index 3966b7e06..fb2f07326 100644 --- a/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java +++ b/vtl-prov/src/test/java/fr/insee/vtl/prov/ProvenanceListenerTest.java @@ -95,10 +95,10 @@ public void testWithEmptyLines() throws ScriptException { """ - ds1 := data.ds1[calc identifier id := id, var1 := cast(var1, integer), var2 := cast(var2, integer)]; + ds1 := data.ds1[calc var1 := cast(var1, integer), var2 := cast(var2, integer)]; - ds2_out := other.ds2[calc identifier id := id, var1 := cast(var1, integer), var2 := cast(var2, integer)]; + ds2_out := other.ds2[calc var1 := cast(var1, integer), var2 := cast(var2, integer)]; ds_sum := ds1 + ds2_out; ds_mul <- ds_sum * 3; 'data.ds_res' <- ds_mul[filter mod(var1, 2) = 0][calc var_sum := var1 + var2]; diff --git a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java index 5560966a6..8d5870e1d 100644 --- a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java +++ b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java @@ -167,14 +167,13 @@ define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop] [filter year = "2021"] [calc pop := cast(pop, integer)] - [drop year, NB_COM, POP_MUNI]; + [drop NB_COM, POP_MUNI]; // Extract dataset on general practitioners from BPE by NUTS 3 in 2021 - GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = "D201" and year = "2021"] - [drop facility_type, year]; + GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = "D201" and year = "2021"]; // Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator - BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021) + BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3) [calc pract_per_10000_inhabitants := nb / pop * 10000] [drop nb, pop];\ """; diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java index 85168f61b..bce7edfa2 100644 --- a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java +++ b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java @@ -168,7 +168,7 @@ public void bpeV1() throws ScriptException { "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" + "[filter TIME_PERIOD = \"2021\"]\n" + "[calc pop := cast(pop, integer)]\n" - + "[drop TIME_PERIOD, NB_COM, POP_MUNI];"); + + "[drop NB_COM, POP_MUNI];"); Dataset censusNuts2021 = (Dataset) engine.getContext().getAttribute("CENSUS_NUTS3_2021"); Structured.DataStructure censusNuts2021Structure = censusNuts2021.getDataStructure(); @@ -181,8 +181,7 @@ public void bpeV1() throws ScriptException { // Step 7 engine.eval( - "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" - + "[drop facility_type, TIME_PERIOD];"); + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"];"); Dataset generalNuts = (Dataset) engine.getContext().getAttribute("GENERAL_PRACT_NUTS3_2021"); Structured.DataStructure generalNutsStructure = generalNuts.getDataStructure(); @@ -195,7 +194,7 @@ public void bpeV1() throws ScriptException { // Step 8 engine.eval( - "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3)\n" + "[calc pract_per_10000_inhabitants := nb / pop * 10000]\n" + "[drop nb, pop];"); diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java index c5f96d45f..d716fa4d2 100644 --- a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java +++ b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java @@ -16,6 +16,7 @@ import javax.script.ScriptEngineManager; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; public class SDMXVTLWorkflowTest { @@ -32,6 +33,8 @@ public void setUp() { engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark"); } + // Disabled for now, we have to update the remote resource + @Disabled @Test void testRefFromRepo() { @@ -61,6 +64,9 @@ void testGetEmptyDataset() { .isEqualTo( new Structured.DataStructure( List.of( + new Structured.Component( + "facility_type", String.class, Dataset.Role.IDENTIFIER), + new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER), new Structured.Component("nuts3", String.class, Dataset.Role.IDENTIFIER), new Structured.Component( "pract_per_10000_inhabitants", Double.class, Dataset.Role.MEASURE)))); @@ -89,14 +95,16 @@ public void testGetRulesetsVTL() { ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml"); SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of()); assertThat(sdmxVtlWorkflow.getRulesetsVTL()) - .isEqualTo( + .isEqualToIgnoringWhitespace( "define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is\n" - + " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" - + " end datapoint ruleset;\n" - + "\n" - + "define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" - + " BOWLING_ALLEY_RULE : when facility_type = \"F102\" then nb > 10 errorcode \"Not enough bowling alleys\"\n" - + " end datapoint ruleset;"); + + " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode\n" + + " \"Municipality code is not in the correct format\"\n" + + " end datapoint ruleset;\n" + + " \n" + + " define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" + + " BOWLING_ALLEY_RULE : when facility_type = \"F102\" then nb > 10 errorcode \"Not enough bowling\n" + + " alleys\"\n" + + " end datapoint ruleset;"); } @Test @@ -104,36 +112,38 @@ public void testGetTransformationsVTL() { ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml"); SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of()); assertThat(sdmxVtlWorkflow.getTransformationsVTL()) - .isEqualTo( + .isEqualToIgnoringWhitespace( "// Validation of municipality code in input file\n" + "CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid);\n" + "\n" + "// Clean BPE input database\n" - + "BPE_DETAIL_CLEAN := BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y]\n" - + " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" + + "BPE_DETAIL_CLEAN := BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y]\n" + + " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" + "\n" + "// BPE aggregation by municipality, type and year\n" - + "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD];\n" + + "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year\n" + + " to TIME_PERIOD];\n" + "\n" + "// BPE aggregation by NUTS 3, type and year\n" - + "BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" - + " [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" + + "BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3)\n" + + " else substr(municipality,1,2)]\n" + + " [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" + "\n" + "// BPE validation of facility types by NUTS 3\n" + "CHECK_NUTS3_TYPES := check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid);\n" + "\n" + "// Prepare 2021 census dataset by NUTS 3\n" - + "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" - + " [filter TIME_PERIOD = \"2021\"]\n" - + " [calc pop := cast(pop, integer)]\n" - + " [drop TIME_PERIOD, NB_COM, POP_MUNI];\n" + + "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" + + " [filter TIME_PERIOD = \"2021\"]\n" + + " [calc pop := cast(pop, integer)]\n" + + " [drop NB_COM, POP_MUNI];\n" + "\n" + "// Extract dataset on general practitioners from BPE by NUTS 3 in 2021\n" - + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" - + " [drop facility_type, TIME_PERIOD];\n" + + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"];\n" + "\n" - + "// Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator\n" - + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" + + "// Merge practitioners and legal population datasets by NUTS 3 in 2021\n" + + "// and compute an indicator\n" + + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3)\n" + " [calc pract_per_10000_inhabitants := nb / pop * 10000]\n" + " [drop nb, pop];"); } diff --git a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml index b16ece3e0..1d16e408d 100644 --- a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml +++ b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml @@ -1,206 +1,302 @@ - + DSD_BPE_DETAIL_1049 false 2024-03-26T10:49:00Z - - + + + - - - Type d'équipements - La classification des équipements en gammes a pour objectif de réunir des équipements qui présentent des logiques d'implantation voisines, en ce sens qu'ils sont fréquemment présents dans les mêmes communes. Ces regroupements permettent d'élaborer des indicateurs synthétiques reflétant l'organisation hiérarchisée des territoires en termes de services à la population. Les gammes d’équipements sont actualisées chaque année pour une nouvelle version de la Base Permanente des Équipements. En effet, d’une part de nouveaux équipements peuvent être introduits dans la base et, d’autre part, l’implantation des équipements dans les communes peut être modifiée, tout cela pouvant avoir des conséquences sur la composition des gammes. - - POLICE - - - - Code officiel géographique 2021 - Les objets géographiques contenus dans le COG sont les collectivités territoriales (communes, départements, régions, collectivités territoriales à statut particulier), les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. - - L'Abergement-Clémenciat - - + + + Type d'équipements + La classification des équipements en gammes a pour objectif de réunir des + équipements qui présentent des logiques d'implantation voisines, en ce sens qu'ils sont fréquemment + présents dans les mêmes communes. Ces regroupements permettent d'élaborer des indicateurs + synthétiques reflétant l'organisation hiérarchisée des territoires en termes de services à la + population. Les gammes d’équipements sont actualisées chaque année pour une nouvelle version de la + Base Permanente des Équipements. En effet, d’une part de nouveaux équipements peuvent être + introduits dans la base et, d’autre part, l’implantation des équipements dans les communes peut être + modifiée, tout cela pouvant avoir des conséquences sur la composition des gammes. + + + POLICE + + + + Code officiel géographique 2021 + Les objets géographiques contenus dans le COG sont les collectivités + territoriales (communes, départements, régions, collectivités territoriales à statut particulier), + les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et + certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont + répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements + intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. + + + L'Abergement-Clémenciat + + Code officiel géographique - Les objets géographiques contenus dans le COG sont les collectivités territoriales (communes, départements, régions, collectivités territoriales à statut particulier), les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. + Les objets géographiques contenus dans le COG sont les collectivités + territoriales (communes, départements, régions, collectivités territoriales à statut particulier), + les découpages administratifs (arrondissements, arrondissements municipaux, pseudo-cantons) et + certains découpages électoraux (cantons), ainsi que les pays et territoires étrangers. Ils sont + répertoriés au 1er janvier de chaque année. Pour la mise à jour du millésime N, tous les événements + intervenus entre le 02/01/N-1 et 01/01/N sont pris en compte. + Ain - - - - Ensemble des concepts de l'Insee - - Equipement - - - Type de l'équipement - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - - - - - Commune - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - - - - - Année de référence - - - Abscisse (Système de coordonnées Lambert 93) - - - Ordonnée (Système de coordonnées Lambert 93) - - + + + + Ensemble des concepts de l'Insee + + Equipement + + + Type de l'équipement + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + + + + + + Commune + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + + + + + + Année de référence + + + Abscisse (Système de coordonnées Lambert 93) + + + Ordonnée (Système de coordonnées Lambert 93) + + Code officiel géographique - + Année - + Nombre de communes - + Nombre d'équipement - + Nombre de médecins généralistes pour 10 OOO habitants - + Population municipale - + Population totale - - + + - + Geolocalized Equipments (geography 2021) - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).EQUIPEMENT - - + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).EQUIPEMENT + + + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + + - + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + + - + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + - + - + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_X - - - + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_X + + + + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_Y - - - + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).LAMBERT_Y + + + + - Cube populations légales - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA - - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TIME_PERIOD - - - - - - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_COM - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_MUNI - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_TOT - - - - + isExternalReference="false" + agencyID="FR1" + id="LEGAL_POP_CUBE" + version="1.0"> + Cube populations légales + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TIME_PERIOD + + + + + + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_COM + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_MUNI + + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).POP_TOT + + + + + Cube BPE par ville - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).MUNICIPALITY + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT + @@ -251,201 +363,246 @@ version="1.0"> Cube BPE par nuts 3 - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + - + - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_EQUIPEMENT + - + Cube médecins généralistes par habitants ventillé par nuts 3 en 2010 - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_AREA + - urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) + - - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_GENERALISTES_PAR_HABITANTS + + + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).NB_GENERALISTES_PAR_HABITANTS + - - Dataflow for BPE_DETAIL - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_DETAIL(1.0) + + Dataflow for BPE_DETAIL + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_DETAIL(1.0) + - - Dataflow for LEGAL_POP_CUBE - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP_CUBE(1.0) + + Dataflow for LEGAL_POP_CUBE + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP_CUBE(1.0) + - + Dataflow for BPE_MUNICIPALITY - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_MUNICIPALITY(1.0) + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_MUNICIPALITY(1.0) + - + Dataflow for BPE_NUTS3 - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_NUTS3(1.0) + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_NUTS3(1.0) + - + Dataflow for BPE_CENSUS_NUTS3_2021 - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_CENSUS_NUTS3_2021(1.0) + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_CENSUS_NUTS3_2021(1.0) + - - VTL Mapping Scheme #1 - - VTL Mapping #1 - urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:BPE_DETAIL(1.0) - - - VTL Mapping #2 - urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:LEGAL_POP_CUBE(1.0) - + + VTL Mapping Scheme #1 + + VTL Mapping #1 + urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:BPE_DETAIL(1.0) + + + VTL Mapping #2 + urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:LEGAL_POP_CUBE(1.0) + + - - Ruleset Scheme #1 - - Datapoint Ruleset UNIQUE_MUNICIPALITY - - define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is - MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode "Municipality code is not in the correct format" - end datapoint ruleset; - - - - Datapoint Ruleset NUTS3_TYPES - - define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is - BOWLING_ALLEY_RULE : when facility_type = "F102" then nb > 10 errorcode "Not enough bowling alleys" - end datapoint ruleset; - - - + + Ruleset Scheme #1 + + Datapoint Ruleset UNIQUE_MUNICIPALITY + + define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is + MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode + "Municipality code is not in the correct format" + end datapoint ruleset; + + + + Datapoint Ruleset NUTS3_TYPES + + define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is + BOWLING_ALLEY_RULE : when facility_type = "F102" then nb > 10 errorcode "Not enough bowling + alleys" + end datapoint ruleset; + + + - - Transformation Scheme for BPE - CENSUS - - Step 1 - Validation of municipality code in input file - check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid) - CHECK_MUNICIPALITY - - - Step 2 - Clean BPE input database - - BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y] - [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year] - - BPE_DETAIL_CLEAN - - - Step 3 - BPE aggregation by municipality, type and year - - BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD] - - BPE_MUNICIPALITY - - - Step 4 - BPE aggregation by NUTS 3, type and year - - BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = "97" then substr(municipality,1,3) else substr(municipality,1,2)] - [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type] - - BPE_NUTS3 - - - Step 5 - BPE validation of facility types by NUTS 3 - check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid) - CHECK_NUTS3_TYPES - - - Step 6 - Prepare 2021 census dataset by NUTS 3 - - LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop] - [filter TIME_PERIOD = "2021"] - [calc pop := cast(pop, integer)] - [drop TIME_PERIOD, NB_COM, POP_MUNI] - - CENSUS_NUTS3_2021 - - - Step 7 - Extract dataset on general practitioners from BPE by NUTS 3 in 2021 - - BPE_NUTS3 [filter facility_type = "D201" and TIME_PERIOD = "2021"] - [drop facility_type, TIME_PERIOD] - - GENERAL_PRACT_NUTS3_2021 - - - Step 8 - Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator - - inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021) + + Transformation Scheme for BPE - CENSUS + + Step 1 + Validation of municipality code in input file + check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid) + CHECK_MUNICIPALITY + + + Step 2 + Clean BPE input database + + BPE_DETAIL_VTL [drop LAMBERT_X, LAMBERT_Y] + [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year] + + BPE_DETAIL_CLEAN + + + Step 3 + BPE aggregation by municipality, type and year + + BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year + to TIME_PERIOD] + + BPE_MUNICIPALITY + + + Step 4 + BPE aggregation by NUTS 3, type and year + + BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = "97" then substr(municipality,1,3) + else substr(municipality,1,2)] + [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type] + + BPE_NUTS3 + + + Step 5 + BPE validation of facility types by NUTS 3 + check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid) + CHECK_NUTS3_TYPES + + + Step 6 + Prepare 2021 census dataset by NUTS 3 + + LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop] + [filter TIME_PERIOD = "2021"] + [calc pop := cast(pop, integer)] + [drop NB_COM, POP_MUNI] + + CENSUS_NUTS3_2021 + + + Step 7 + Extract dataset on general practitioners from BPE by NUTS 3 in 2021 + + + BPE_NUTS3 [filter facility_type = "D201" and TIME_PERIOD = "2021"] + + GENERAL_PRACT_NUTS3_2021 + + + Step 8 + Merge practitioners and legal population datasets by NUTS 3 in 2021 + and compute an indicator + + + inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021 using nuts3) [calc pract_per_10000_inhabitants := nb / pop * 10000] [drop nb, pop] - - BPE_CENSUS_NUTS3_2021 - - urn:sdmx:org.sdmx.infomodel.transformation.VtlMappingScheme=FR1:VTLMS1(1.0) - urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0) - - + + BPE_CENSUS_NUTS3_2021 + + urn:sdmx:org.sdmx.infomodel.transformation.VtlMappingScheme=FR1:VTLMS1(1.0) + + urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0) + + + \ No newline at end of file diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java index 5cfc73340..d0e97f7dd 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/SparkDatasetTest.java @@ -130,11 +130,10 @@ public void testParquetMetadataWriting(@TempDir Path tmpDirectory) throws Script context.setAttribute("ds2", dsWithMetadata, ScriptContext.ENGINE_SCOPE); - engine.eval("ds3 := ds2[calc attribute school_id := school_id, identifier year := year];"); + engine.eval("ds3 := ds2[calc identifier year := year];"); SparkDataset dsWithMetadataAndRoles = (SparkDataset) engine.getContext().getAttribute("ds3"); - assertTrue(dsWithMetadataAndRoles.getDataStructure().get("school_id").isAttribute()); assertTrue(dsWithMetadataAndRoles.getDataStructure().get("year").isIdentifier()); } } diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java index 3ad9bb9d3..10808118b 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/OperatorsTest.java @@ -47,7 +47,7 @@ public void testOperators() throws ScriptException { engine.eval( "res := ds1#long1; " + "res1 := isnull(ds1); " - + "ds_1 := ds1[keep id, long1, double1]; ds_2 := ds2[keep id, long1, double1]; " + + "ds_1 := ds1[keep long1, double1]; ds_2 := ds2[keep long1, double1]; " + "res2 := ds_1 + ds_2; " + "res3 := ds_1 - ds_2; " + "res4 := ds_1 * ds_2; " @@ -67,7 +67,8 @@ public void testOperators() throws ScriptException { + "res18 := mod(ds_1, 5); " + "res19 := power(ds_1, 5); " + "res20 := log(abs(ds_1), 5); " - + "ds_11 := ds1[keep id, string1, string2]; ds_22 := ds2[keep id, string1][calc string2 := string1]; " + + "ds_11 := ds1[keep string1, string2]; " + + "ds_22 := ds2[keep string1][calc string2 := string1]; " + "res21 := ds_11 || ds_22; "); var res = engine.getContext().getAttribute("res21"); assertThat(((Dataset) res).getDataStructure().get("string1").getType()).isEqualTo(String.class); @@ -78,8 +79,8 @@ public void testPlan() throws ScriptException { engine.getContext().setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE); engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE); engine.eval( - "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; " - + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + "ds1 := ds_1[keep long1][rename long1 to bool_var]; " + + "ds2 := ds_2[keep long1][rename long1 to bool_var]; " + "res := if ds1 > ds2 then ds1 else ds2;"); var res = engine.getContext().getAttribute("res"); assertThat(((Dataset) res).getDataAsMap()) diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java index 842758114..5512f86d6 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ProjectTest.java @@ -59,7 +59,7 @@ public void testProjection() throws ScriptException { ScriptContext context = engine.getContext(); context.setAttribute("ds", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval("ds1 := ds[keep name, age];"); + engine.eval("ds1 := ds[keep age];"); assertThat(engine.getContext().getAttribute("ds")) .isInstanceOf(fr.insee.vtl.model.Dataset.class); diff --git a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java index 19f8eff0d..888bc55b7 100644 --- a/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java +++ b/vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/ValidationTest.java @@ -713,9 +713,9 @@ public void testValidationSimpleException() throws ScriptException { Dataset DS_r_to_rename = (Dataset) engine.getContext().getAttribute("DS_r_to_rename"); List DS_r_to_renameMeasure = DS_r_to_rename.getDataStructure().values().stream() - .filter(c -> c.isMeasure()) - .map(c -> c.getName()) - .collect(Collectors.toList()); + .filter(Structured.Component::isMeasure) + .map(Structured.Component::getName) + .toList(); assertThat(DS_r_to_renameMeasure.size()).isEqualTo(4); assertThat(DS_r_to_renameMeasure.contains("imbalance")).isTrue(); } @@ -744,8 +744,8 @@ public void serializationCheckDatapointTest() throws ScriptException { "ds1_1 := ds1[calc identifier id := id, long1 := cast(long1, integer), double1 := cast(double1, number), bool1 := cast(bool1, boolean)]; " + "ds2_1 := ds2[calc identifier id := id, long1 := cast(long1, integer), double1 := cast(double1, number), bool1 := cast(bool1, boolean)]; " + "ds_concat := ds1_1#string1 || \" and \" || ds2_1#string1; " - + "ds1_num := ds1_1[keep id, long1, double1]; " - + "ds2_num := ds2_1[keep id, long1, double1]; " + + "ds1_num := ds1_1[keep long1, double1]; " + + "ds2_num := ds2_1[keep long1, double1]; " + "ds_mod := mod(ds1_num, 2); " + "ds_sum := ds1_num + ds2_num; " + "ds_compare := ds1_num = ds2_num; " From e6a2465094eb396edda1b6f9a8a7dba2af82a4e9 Mon Sep 17 00:00:00 2001 From: Hadrien Kohl Date: Fri, 16 Jan 2026 10:20:54 +0100 Subject: [PATCH 13/15] Refactor --- .../vtl/engine/visitors/ClauseVisitor.java | 61 ++++++++----------- .../engine/visitors/ClauseVisitorTest.java | 3 +- .../functions/JoinFunctionsTest.java | 2 +- 3 files changed, 27 insertions(+), 39 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index d8853822a..d4c541696 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -6,6 +6,7 @@ import fr.insee.vtl.engine.VtlScriptEngine; import fr.insee.vtl.engine.exceptions.InvalidArgumentException; +import fr.insee.vtl.engine.exceptions.UndefinedVariableException; import fr.insee.vtl.engine.exceptions.VtlRuntimeException; import fr.insee.vtl.engine.visitors.expression.ExpressionVisitor; import fr.insee.vtl.model.*; @@ -111,48 +112,34 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext // The type of the op can either be KEEP or DROP. boolean keep = ctx.op.getType() == VtlParser.KEEP; - // Columns explicitly requested in the KEEP/DROP clause - List cleanColumnNames = ctx.componentID().stream().map(ClauseVisitor::getName).toList(); - - Collection inputColumns = datasetExpression.getDataStructure().keySet(); - // Dataset identifiers (role = IDENTIFIER) Map identifiers = datasetExpression.getDataStructure().getIdentifiers().stream() .collect(Collectors.toMap(Structured.Component::getName, Function.identity())); + var columns = ctx.componentID().stream() + .collect(Collectors.toMap(ClauseVisitor::getName, Function.identity())); + + var structure = datasetExpression.getDataStructure(); + // Evaluate that all requested columns must exist in the dataset or raise an error - for (String requested : cleanColumnNames) { - if (!inputColumns.contains(requested)) { - throw new VtlRuntimeException( - new InvalidArgumentException( - // TODO: use actual column context. - String.format("'%s' not found in dataset.", requested), fromContext(ctx))); - } + // TODO: Is that no handled already? + for (String col : columns.keySet()) { + if (!structure.containsKey(col)) { + throw new VtlRuntimeException( + new UndefinedVariableException(col, fromContext(columns.get(col))) + ); + } } // VTL specification: identifiers must not appear explicitly in KEEP - Set forbidden = - cleanColumnNames.stream() - .filter(identifiers::containsKey) - .collect(Collectors.toCollection(LinkedHashSet::new)); - - if (!forbidden.isEmpty()) { - StringBuilder details = new StringBuilder(); - for (String id : forbidden) { - Dataset.Component comp = identifiers.get(id); - details.append( - String.format( - "%s(role=%s, type=%s) ", - id, comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")); - } - throw new VtlRuntimeException( - new InvalidArgumentException( - String.format( - "identifiers %s must not be explicitly listed in KEEP/DROP. Details: %s", - forbidden, details.toString().trim()), - // TODO: use actual column context. - fromContext(ctx))); + // TODO: Use multi errors that noah created? + for (String col : columns.keySet()) { + if (structure.get(col).isIdentifier()) { + throw new VtlRuntimeException( + new InvalidArgumentException("cannot keep/drop identifiers", fromContext(columns.get(col))) + ); + } } // Build result set: @@ -161,10 +148,10 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext final Set resultSet = new LinkedHashSet<>(); resultSet.addAll(identifiers.keySet()); if (keep) { - resultSet.addAll(cleanColumnNames); + resultSet.addAll(columns.keySet()); } else { - for (String col : inputColumns) { - if (!cleanColumnNames.contains(col)) { + for (String col : structure.keySet()) { + if (!columns.keySet().contains(col)) { resultSet.add(col); } } @@ -172,7 +159,7 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext // Retrieve the output column names (identifiers + requested) final List outputColumns = - inputColumns.stream().filter(resultSet::contains).collect(Collectors.toList()); + structure.keySet().stream().filter(resultSet::contains).collect(Collectors.toList()); return processingEngine.executeProject(datasetExpression, outputColumns); } diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index 4c06bb405..992e56f19 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -231,7 +231,8 @@ public void testKeepDropClause_identifierExplicitShouldFail() { assertThatThrownBy(() -> engine.eval("ds := ds1[keep name, age];")) .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("identifiers [name] must not be explicitly listed in KEEP/DROP"); + .hasMessage("cannot keep/drop identifiers") + .is(atPosition(0, 15, 19)); } @Test diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java index f18c03612..f81ac4b7b 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java @@ -138,7 +138,7 @@ public void testLeftJoinWithDifferentIdentifiers() throws ScriptException { """)) .isInstanceOf(InvalidArgumentException.class) .hasMessage( - "CALC cannot overwrite IDENTIFIER 'Id_2' (role=IDENTIFIER, type=class java.lang.Long)."); + "using component Id_2 has to be an identifier"); } @Test From 86674cdc0cd41f66d224e15ec6d39b730df79ee2 Mon Sep 17 00:00:00 2001 From: Hadrien Kohl Date: Fri, 16 Jan 2026 10:21:09 +0100 Subject: [PATCH 14/15] Improve atPosition() condition --- .../insee/vtl/engine/VtlScriptEngineTest.java | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java index d927d21d8..0ea35c5d5 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java @@ -33,24 +33,36 @@ public static Condition atPosition( public static Condition atPosition( Integer startLine, Integer endLine, Integer startColumn, Integer endColumn) { - return new Condition<>( - throwable -> { - var scriptException = (VtlScriptException) throwable; - var position = scriptException.getPosition(); - return position.startLine().equals(startLine) - && position.endLine().equals(endLine) - && position.startColumn().equals(startColumn) - && position.endColumn().equals(endColumn); - }, - "at position <%d:%d-%d:%d>", - startLine, - endLine, - startColumn, - endColumn); - } - - public static > Boolean test(T left, T right) { - return true; + return new Condition() { + @Override + public boolean matches(T throwable) { + if (!(throwable instanceof VtlScriptException scriptException)) { + return false; + } + var position = scriptException.getPosition(); + boolean matches = position.startLine().equals(startLine) + && position.endLine().equals(endLine) + && position.startColumn().equals(startColumn) + && position.endColumn().equals(endColumn); + + // Set description that includes actual position if it doesn't match + if (matches) { + describedAs("at position <%d:%d-%d:%d>", startLine, startColumn, endLine, endColumn); + } else { + describedAs( + "at position <%d:%d-%d:%d> but was <%d:%d-%d:%d>", + startLine, + startColumn, + endLine, + endColumn, + position.startLine(), + position.startColumn(), + position.endLine(), + position.endColumn()); + } + return matches; + } + }; } @BeforeEach From 689b27d40c3a50a47e4e412965dd7928f854ff65 Mon Sep 17 00:00:00 2001 From: Hadrien Kohl Date: Fri, 16 Jan 2026 10:23:00 +0100 Subject: [PATCH 15/15] Reformat --- .../vtl/engine/visitors/ClauseVisitor.java | 24 +++++++++---------- .../insee/vtl/engine/VtlScriptEngineTest.java | 9 +++---- .../functions/JoinFunctionsTest.java | 3 +-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index d4c541696..02aee4318 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -117,29 +117,29 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext datasetExpression.getDataStructure().getIdentifiers().stream() .collect(Collectors.toMap(Structured.Component::getName, Function.identity())); - var columns = ctx.componentID().stream() - .collect(Collectors.toMap(ClauseVisitor::getName, Function.identity())); + var columns = + ctx.componentID().stream() + .collect(Collectors.toMap(ClauseVisitor::getName, Function.identity())); var structure = datasetExpression.getDataStructure(); // Evaluate that all requested columns must exist in the dataset or raise an error // TODO: Is that no handled already? for (String col : columns.keySet()) { - if (!structure.containsKey(col)) { - throw new VtlRuntimeException( - new UndefinedVariableException(col, fromContext(columns.get(col))) - ); - } + if (!structure.containsKey(col)) { + throw new VtlRuntimeException( + new UndefinedVariableException(col, fromContext(columns.get(col)))); + } } // VTL specification: identifiers must not appear explicitly in KEEP // TODO: Use multi errors that noah created? for (String col : columns.keySet()) { - if (structure.get(col).isIdentifier()) { - throw new VtlRuntimeException( - new InvalidArgumentException("cannot keep/drop identifiers", fromContext(columns.get(col))) - ); - } + if (structure.get(col).isIdentifier()) { + throw new VtlRuntimeException( + new InvalidArgumentException( + "cannot keep/drop identifiers", fromContext(columns.get(col)))); + } } // Build result set: diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java index 0ea35c5d5..e0cb8a94c 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java @@ -40,10 +40,11 @@ public boolean matches(T throwable) { return false; } var position = scriptException.getPosition(); - boolean matches = position.startLine().equals(startLine) - && position.endLine().equals(endLine) - && position.startColumn().equals(startColumn) - && position.endColumn().equals(endColumn); + boolean matches = + position.startLine().equals(startLine) + && position.endLine().equals(endLine) + && position.startColumn().equals(startColumn) + && position.endColumn().equals(endColumn); // Set description that includes actual position if it doesn't match if (matches) { diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java index f81ac4b7b..8042158e0 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/functions/JoinFunctionsTest.java @@ -137,8 +137,7 @@ public void testLeftJoinWithDifferentIdentifiers() throws ScriptException { result2 := left_join(ds_1_1, ds_2 using Id_2);\ """)) .isInstanceOf(InvalidArgumentException.class) - .hasMessage( - "using component Id_2 has to be an identifier"); + .hasMessage("using component Id_2 has to be an identifier"); } @Test