From 819f671905c8943ab3b7eb9465eb357e0b48ce6a Mon Sep 17 00:00:00 2001 From: Suyash Kumar Date: Mon, 19 May 2025 23:23:23 +0000 Subject: [PATCH 1/3] Implement CQL Substring operator This commit introduces the implementation for the CQL Substring operator. * Initial jules implementation * Correctly support startIndex=stringLen case, correct tests * Refactor for readability Author: Suyash Kumar --- interpreter/operator_dispatcher.go | 11 + interpreter/operator_string.go | 90 ++++++++ model/model.go | 9 +- parser/operators.go | 15 ++ tests/enginetests/operator_string_test.go | 258 ++++++++++++++++++++++ tests/spectests/exclusions/exclusions.go | 1 - 6 files changed, 380 insertions(+), 4 deletions(-) diff --git a/interpreter/operator_dispatcher.go b/interpreter/operator_dispatcher.go index e42c2a2..32aba18 100644 --- a/interpreter/operator_dispatcher.go +++ b/interpreter/operator_dispatcher.go @@ -1410,6 +1410,17 @@ func (i *interpreter) naryOverloads(m model.INaryExpression) ([]convert.Overload Result: evalRound, }, }, nil + case *model.Substring: + return []convert.Overload[evalNarySignature]{ + { + Operands: []types.IType{types.String, types.Integer}, + Result: evalSubstring, + }, + { + Operands: []types.IType{types.String, types.Integer, types.Integer}, + Result: evalSubstring, + }, + }, nil default: return nil, fmt.Errorf("unsupported Nary Expression %v", m.GetName()) } diff --git a/interpreter/operator_string.go b/interpreter/operator_string.go index 9be558f..3e15453 100644 --- a/interpreter/operator_string.go +++ b/interpreter/operator_string.go @@ -448,3 +448,93 @@ func evalMatches(m model.IBinaryExpression, argString, patternString result.Valu res := match != "" return result.New(res) } + +// Substring(stringToSub String, startIndex Integer) String +// Substring(stringToSub String, startIndex Integer, length Integer) String +// https://cql.hl7.org/09-b-cqlreference.html#substring +func evalSubstring(m model.INaryExpression, operands []result.Value) (result.Value, error) { + if len(operands) < 2 || len(operands) > 3 { + return result.Value{}, fmt.Errorf("substring expects 2 or 3 arguments, got %d", len(operands)) + } + + // Check for null operands first + if result.IsNull(operands[0]) || result.IsNull(operands[1]) { + return result.New(nil) + } + + stringToSub, startIndex, err := extractSubstringBaseArgs(operands) + if err != nil { + return result.Value{}, err + } + + runes := []rune(stringToSub) + stringLen := int32(len(runes)) + + // Special case: If string is empty and startIndex is 0, return empty string + if stringLen == 0 && startIndex == 0 { + return result.New("") + } + + // Rule: If startIndex is less than 0 or greater than or equal to the length of the stringToSub, the result is null. + if startIndex < 0 || startIndex >= stringLen { + return result.New(nil) + } + + // Handle three-argument form: Substring(stringToSub, startIndex, length) + if len(operands) == 3 { + return substringWithLength(runes, startIndex, operands[2]) + } + + // Handle two-argument form: Substring(stringToSub, startIndex) + // Rule: If length is not specified, the result is the substring of stringToSub starting at startIndex. + return result.New(string(runes[startIndex:])) +} + +// extractSubstringBaseArgs extracts and validates the first two arguments for Substring +// Note: This assumes operands are already checked for null +func extractSubstringBaseArgs(operands []result.Value) (string, int32, error) { + // Operand 0: stringToSub (String) + stringToSub, err := result.ToString(operands[0]) + if err != nil { + return "", 0, fmt.Errorf("could not convert stringToSub to string: %w", err) + } + + // Operand 1: startIndex (Integer) + startIndex, err := result.ToInt32(operands[1]) + if err != nil { + return "", 0, fmt.Errorf("could not convert startIndex to int32: %w", err) + } + + return stringToSub, startIndex, nil +} + +// substringWithLength handles the three-argument form of Substring +func substringWithLength(runes []rune, startIndex int32, lengthOperand result.Value) (result.Value, error) { + if result.IsNull(lengthOperand) { + return result.New(nil) + } + + length, err := result.ToInt32(lengthOperand) + if err != nil { + return result.Value{}, fmt.Errorf("could not convert length to int32: %w", err) + } + + // Rule: If length is less than 0, the result is null. + if length < 0 { + return result.New(nil) + } + + // Rule: If length is 0, the result is an empty string. + if length == 0 { + return result.New("") + } + + stringLen := int32(len(runes)) + endIndex := startIndex + length + // Rule: If length is greater than the remaining characters, include characters to the end. + if endIndex > stringLen { + endIndex = stringLen + } + + return result.New(string(runes[startIndex:endIndex])) +} diff --git a/model/model.go b/model/model.go index 3acd038..9d6ea42 100644 --- a/model/model.go +++ b/model/model.go @@ -1054,9 +1054,9 @@ type Union struct{ *BinaryExpression } type Split struct{ *BinaryExpression } // Substring ELM Expression https://cql.hl7.org/09-b-cqlreference.html#substring -// Substring is an OperatorExpression in ELM, but we're modeling it as a BinaryExpression since in CQL -// it takes two or three arguments (string, start, length). -type Substring struct{ *BinaryExpression } +// Substring is an OperatorExpression in ELM. It takes two or three arguments (string, start, length). +// We model it as a NaryExpression to handle both overloads. +type Substring struct{ *NaryExpression } // Indexer ELM Expression https://cql.hl7.org/04-logicalspecification.html#indexer. type Indexer struct{ *BinaryExpression } @@ -1642,6 +1642,9 @@ func (i *Indexer) GetName() string { return "Indexer" } // GetName returns the name of the system operator. func (a *IndexOf) GetName() string { return "IndexOf" } +// GetName returns the name of the system operator. +func (s *Substring) GetName() string { return "Substring" } + // GetName returns the name of the system operator. func (m *Median) GetName() string { return "Median" } diff --git a/parser/operators.go b/parser/operators.go index e934ee9..651837f 100644 --- a/parser/operators.go +++ b/parser/operators.go @@ -286,6 +286,21 @@ func (p *Parser) loadSystemOperators() error { } }, }, + { + name: "Substring", + operands: [][]types.IType{ + {types.String, types.Integer}, + {types.String, types.Integer, types.Integer}, + }, + model: func() model.IExpression { + return &model.Substring{ + // NaryExpression is used here because Substring can have 2 or 3 operands. + NaryExpression: &model.NaryExpression{ + Expression: model.ResultType(types.String), + }, + } + }, + }, // CONVERT QUANTITY OPERATOR { name: "ConvertQuantity", diff --git a/tests/enginetests/operator_string_test.go b/tests/enginetests/operator_string_test.go index 284d849..61d6740 100644 --- a/tests/enginetests/operator_string_test.go +++ b/tests/enginetests/operator_string_test.go @@ -120,6 +120,212 @@ func TestConcatenate(t *testing.T) { } } +func TestSubstring(t *testing.T) { + tests := []struct { + name string + cql string + wantModel model.IExpression + wantResult result.Value + }{ + // Two-argument form + { + name: "Substring('abcdef', 2)", + cql: "Substring('abcdef', 2)", + wantModel: &model.Substring{ + NaryExpression: &model.NaryExpression{ + Operands: []model.IExpression{ + model.NewLiteral("abcdef", types.String), + model.NewLiteral("2", types.Integer), + }, + Expression: model.ResultType(types.String), + }, + }, + wantResult: newOrFatal(t, "cdef"), + }, + { + name: "Substring('abcdef', 0)", + cql: "Substring('abcdef', 0)", + wantResult: newOrFatal(t, "abcdef"), + }, + { + name: "Substring('abcdef', 5)", + cql: "Substring('abcdef', 5)", + wantResult: newOrFatal(t, "f"), + }, + { + name: "Substring('abcdef', 6) -> null (startIndex equals length)", + cql: "Substring('abcdef', 6)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', 7) -> null (startIndex out of bounds)", + cql: "Substring('abcdef', 7)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', -1) -> null (startIndex out of bounds)", + cql: "Substring('abcdef', -1)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('', 0) -> empty string", + cql: "Substring('', 0)", + wantResult: newOrFatal(t, ""), + }, + { + name: "Substring('', 1) -> null", + cql: "Substring('', 1)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring(null, 2) -> null", + cql: "Substring(null, 2)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', null) -> null", + cql: "Substring('abcdef', null)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('😊😊😊', 1) -> 😊😊 (Unicode)", + cql: "Substring('😊😊😊', 1)", + wantResult: newOrFatal(t, "😊😊"), + }, + // Three-argument form + { + name: "Substring('abcdef', 2, 3)", + cql: "Substring('abcdef', 2, 3)", + wantModel: &model.Substring{ + NaryExpression: &model.NaryExpression{ + Operands: []model.IExpression{ + model.NewLiteral("abcdef", types.String), + model.NewLiteral("2", types.Integer), + model.NewLiteral("3", types.Integer), + }, + Expression: model.ResultType(types.String), + }, + }, + wantResult: newOrFatal(t, "cde"), + }, + { + name: "Substring('abcdef', 0, 3)", + cql: "Substring('abcdef', 0, 3)", + wantResult: newOrFatal(t, "abc"), + }, + { + name: "Substring('abcdef', 0, 6)", + cql: "Substring('abcdef', 0, 6)", + wantResult: newOrFatal(t, "abcdef"), + }, + { + name: "Substring('abcdef', 0, 7) -> abcdef (length truncated)", + cql: "Substring('abcdef', 0, 7)", + wantResult: newOrFatal(t, "abcdef"), + }, + { + name: "Substring('abcdef', 5, 1)", + cql: "Substring('abcdef', 5, 1)", + wantResult: newOrFatal(t, "f"), + }, + { + name: "Substring('abcdef', 5, 2) -> f (length truncated)", + cql: "Substring('abcdef', 5, 2)", + wantResult: newOrFatal(t, "f"), + }, + { + name: "Substring('abcdef', 2, 0) -> empty string (zero length)", + cql: "Substring('abcdef', 2, 0)", + wantResult: newOrFatal(t, ""), + }, + { + name: "Substring('abcdef', 6, 0) -> null (startIndex equals length)", + cql: "Substring('abcdef', 6, 0)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', 6, 1) -> null (startIndex equals length)", + cql: "Substring('abcdef', 6, 1)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', 7, 2) -> null (startIndex out of bounds)", + cql: "Substring('abcdef', 7, 2)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', -1, 3) -> null (startIndex out of bounds)", + cql: "Substring('abcdef', -1, 3)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', 2, -1) -> null (negative length)", + cql: "Substring('abcdef', 2, -1)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('', 0, 0) -> empty string", + cql: "Substring('', 0, 0)", + wantResult: newOrFatal(t, ""), + }, + { + name: "Substring('', 0, 1) -> empty string (length truncated)", + cql: "Substring('', 0, 1)", + wantResult: newOrFatal(t, ""), + }, + { + name: "Substring('', 1, 1) -> null", + cql: "Substring('', 1, 1)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring(null, 2, 3) -> null", + cql: "Substring(null, 2, 3)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', null, 3) -> null", + cql: "Substring('abcdef', null, 3)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('abcdef', 2, null) -> null", + cql: "Substring('abcdef', 2, null)", + wantResult: newOrFatal(t, nil), + }, + { + name: "Substring('😊😊😊', 1, 1) -> 😊 (Unicode)", + cql: "Substring('😊😊😊', 1, 1)", + wantResult: newOrFatal(t, "😊"), + }, + { + name: "Substring('😊😊😊', 0, 2) -> 😊😊 (Unicode, length truncated within runes)", + cql: "Substring('😊😊😊', 0, 2)", + wantResult: newOrFatal(t, "😊😊"), + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + p := newFHIRParser(t) + parsedLibs, err := p.Libraries(context.Background(), wrapInLib(t, tc.cql), parser.Config{}) + if err != nil { + t.Fatalf("Parse returned unexpected error: %v", err) + } + if diff := cmp.Diff(tc.wantModel, getTESTRESULTModel(t, parsedLibs)); tc.wantModel != nil && diff != "" { + t.Errorf("Parse diff (-want +got):\n%s", diff) + } + + results, err := interpreter.Eval(context.Background(), parsedLibs, defaultInterpreterConfig(t, p)) + if err != nil { + t.Fatalf("Eval returned unexpected error: %v", err) + } + if diff := cmp.Diff(tc.wantResult, getTESTRESULT(t, results), protocmp.Transform()); diff != "" { + t.Errorf("Eval diff (-want +got)\n%v", diff) + } + }) + } +} + func TestToString(t *testing.T) { tests := []struct { name string @@ -944,6 +1150,56 @@ func TestPositionOf(t *testing.T) { cql: "PositionOf('B', '')", wantResult: newOrFatal(t, -1), }, + } + { + name: "PositionOfFound", + cql: "PositionOf('B','ABC')", + wantModel: &model.PositionOf{ + BinaryExpression: &model.BinaryExpression{ + Operands: []model.IExpression{ + model.NewLiteral("B", types.String), + model.NewLiteral("ABC", types.String), + }, + Expression: model.ResultType(types.Integer), + }, + }, + wantResult: newOrFatal(t, 1), + }, + { + name: "PositionOfMultiples", + cql: "PositionOf('B', 'ABCBA')", + wantResult: newOrFatal(t, 1), + }, + { + name: "PositionOfNotFound", + cql: "PositionOf('B','ACDC')", + wantResult: newOrFatal(t, -1), + }, + { + name: "PositionOfLeftNull", + cql: "PositionOf(null, 'ABC')", + wantResult: newOrFatal(t, nil), + }, + { + name: "PositionOfRightNull", + cql: "PositionOf('B', null)", + wantResult: newOrFatal(t, nil), + }, + { + name: "PositionOfBothNull", + cql: "PositionOf(null, null)", + wantResult: newOrFatal(t, nil), + }, + { + name: "PositionOfLeftEmpty", + cql: "PositionOf('','ABC')", + wantResult: newOrFatal(t, 0), + }, + { + name: "PositionOfRightEmpty", + cql: "PositionOf('B', '')", + wantResult: newOrFatal(t, -1), + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { @@ -976,6 +1232,7 @@ func TestStartsWith(t *testing.T) { { name: "StartsWithTrue", cql: "StartsWith('Appendix','App')", + cql: "StartsWith('Appendix','App')", wantModel: &model.StartsWith{ BinaryExpression: &model.BinaryExpression{ Operands: []model.IExpression{ @@ -1108,3 +1365,4 @@ func TestMatches(t *testing.T) { }) } } + diff --git a/tests/spectests/exclusions/exclusions.go b/tests/spectests/exclusions/exclusions.go index f4bb468..1e3963b 100644 --- a/tests/spectests/exclusions/exclusions.go +++ b/tests/spectests/exclusions/exclusions.go @@ -404,7 +404,6 @@ func XMLTestFileExclusionDefinitions() map[string]XMLTestFileExclusions { "CqlStringOperatorsTest.xml": { GroupExcludes: []string{ // TODO: b/342061715 - unsupported operators. - "Substring", }, NamesExcludes: []string{ // TODO: b/346880550 - These test appear to have incorrect assertions. From 0e477f6e593c399b17a42f15574d01f946d7e7da Mon Sep 17 00:00:00 2001 From: Suyash Kumar Date: Fri, 30 May 2025 12:46:21 -0400 Subject: [PATCH 2/3] fix merge duplication --- tests/enginetests/operator_string_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/enginetests/operator_string_test.go b/tests/enginetests/operator_string_test.go index 61d6740..4f0e840 100644 --- a/tests/enginetests/operator_string_test.go +++ b/tests/enginetests/operator_string_test.go @@ -1232,7 +1232,6 @@ func TestStartsWith(t *testing.T) { { name: "StartsWithTrue", cql: "StartsWith('Appendix','App')", - cql: "StartsWith('Appendix','App')", wantModel: &model.StartsWith{ BinaryExpression: &model.BinaryExpression{ Operands: []model.IExpression{ From 536adffbb2f504749c4ca6e4fd40797e98566d88 Mon Sep 17 00:00:00 2001 From: Suyash Kumar Date: Tue, 3 Jun 2025 10:35:28 -0700 Subject: [PATCH 3/3] Fix merge issue --- tests/enginetests/operator_string_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/enginetests/operator_string_test.go b/tests/enginetests/operator_string_test.go index 4f0e840..84269c9 100644 --- a/tests/enginetests/operator_string_test.go +++ b/tests/enginetests/operator_string_test.go @@ -1150,7 +1150,6 @@ func TestPositionOf(t *testing.T) { cql: "PositionOf('B', '')", wantResult: newOrFatal(t, -1), }, - } { name: "PositionOfFound", cql: "PositionOf('B','ABC')", @@ -1364,4 +1363,3 @@ func TestMatches(t *testing.T) { }) } } -