Skip to content

Commit f5bc7ef

Browse files
google-labs-jules[bot]suyashkumar
authored andcommitted
Implement CQL Substring operator
This commit introduces the implementation for the CQL Substring operator. * Initial jules implementation * Correctly support startIndex=stringLen case, correct tests * Refactor for readability
1 parent ccdaaaf commit f5bc7ef

6 files changed

Lines changed: 380 additions & 4 deletions

File tree

interpreter/operator_dispatcher.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,17 @@ func (i *interpreter) naryOverloads(m model.INaryExpression) ([]convert.Overload
14101410
Result: evalRound,
14111411
},
14121412
}, nil
1413+
case *model.Substring:
1414+
return []convert.Overload[evalNarySignature]{
1415+
{
1416+
Operands: []types.IType{types.String, types.Integer},
1417+
Result: evalSubstring,
1418+
},
1419+
{
1420+
Operands: []types.IType{types.String, types.Integer, types.Integer},
1421+
Result: evalSubstring,
1422+
},
1423+
}, nil
14131424
default:
14141425
return nil, fmt.Errorf("unsupported Nary Expression %v", m.GetName())
14151426
}

interpreter/operator_string.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,3 +448,93 @@ func evalMatches(m model.IBinaryExpression, argString, patternString result.Valu
448448
res := match != ""
449449
return result.New(res)
450450
}
451+
452+
// Substring(stringToSub String, startIndex Integer) String
453+
// Substring(stringToSub String, startIndex Integer, length Integer) String
454+
// https://cql.hl7.org/09-b-cqlreference.html#substring
455+
func evalSubstring(m model.INaryExpression, operands []result.Value) (result.Value, error) {
456+
if len(operands) < 2 || len(operands) > 3 {
457+
return result.Value{}, fmt.Errorf("substring expects 2 or 3 arguments, got %d", len(operands))
458+
}
459+
460+
// Check for null operands first
461+
if result.IsNull(operands[0]) || result.IsNull(operands[1]) {
462+
return result.New(nil)
463+
}
464+
465+
stringToSub, startIndex, err := extractSubstringBaseArgs(operands)
466+
if err != nil {
467+
return result.Value{}, err
468+
}
469+
470+
runes := []rune(stringToSub)
471+
stringLen := int32(len(runes))
472+
473+
// Special case: If string is empty and startIndex is 0, return empty string
474+
if stringLen == 0 && startIndex == 0 {
475+
return result.New("")
476+
}
477+
478+
// Rule: If startIndex is less than 0 or greater than or equal to the length of the stringToSub, the result is null.
479+
if startIndex < 0 || startIndex >= stringLen {
480+
return result.New(nil)
481+
}
482+
483+
// Handle three-argument form: Substring(stringToSub, startIndex, length)
484+
if len(operands) == 3 {
485+
return substringWithLength(runes, startIndex, operands[2])
486+
}
487+
488+
// Handle two-argument form: Substring(stringToSub, startIndex)
489+
// Rule: If length is not specified, the result is the substring of stringToSub starting at startIndex.
490+
return result.New(string(runes[startIndex:]))
491+
}
492+
493+
// extractSubstringBaseArgs extracts and validates the first two arguments for Substring
494+
// Note: This assumes operands are already checked for null
495+
func extractSubstringBaseArgs(operands []result.Value) (string, int32, error) {
496+
// Operand 0: stringToSub (String)
497+
stringToSub, err := result.ToString(operands[0])
498+
if err != nil {
499+
return "", 0, fmt.Errorf("could not convert stringToSub to string: %w", err)
500+
}
501+
502+
// Operand 1: startIndex (Integer)
503+
startIndex, err := result.ToInt32(operands[1])
504+
if err != nil {
505+
return "", 0, fmt.Errorf("could not convert startIndex to int32: %w", err)
506+
}
507+
508+
return stringToSub, startIndex, nil
509+
}
510+
511+
// substringWithLength handles the three-argument form of Substring
512+
func substringWithLength(runes []rune, startIndex int32, lengthOperand result.Value) (result.Value, error) {
513+
if result.IsNull(lengthOperand) {
514+
return result.New(nil)
515+
}
516+
517+
length, err := result.ToInt32(lengthOperand)
518+
if err != nil {
519+
return result.Value{}, fmt.Errorf("could not convert length to int32: %w", err)
520+
}
521+
522+
// Rule: If length is less than 0, the result is null.
523+
if length < 0 {
524+
return result.New(nil)
525+
}
526+
527+
// Rule: If length is 0, the result is an empty string.
528+
if length == 0 {
529+
return result.New("")
530+
}
531+
532+
stringLen := int32(len(runes))
533+
endIndex := startIndex + length
534+
// Rule: If length is greater than the remaining characters, include characters to the end.
535+
if endIndex > stringLen {
536+
endIndex = stringLen
537+
}
538+
539+
return result.New(string(runes[startIndex:endIndex]))
540+
}

model/model.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,9 +1054,9 @@ type Union struct{ *BinaryExpression }
10541054
type Split struct{ *BinaryExpression }
10551055

10561056
// Substring ELM Expression https://cql.hl7.org/09-b-cqlreference.html#substring
1057-
// Substring is an OperatorExpression in ELM, but we're modeling it as a BinaryExpression since in CQL
1058-
// it takes two or three arguments (string, start, length).
1059-
type Substring struct{ *BinaryExpression }
1057+
// Substring is an OperatorExpression in ELM. It takes two or three arguments (string, start, length).
1058+
// We model it as a NaryExpression to handle both overloads.
1059+
type Substring struct{ *NaryExpression }
10601060

10611061
// Indexer ELM Expression https://cql.hl7.org/04-logicalspecification.html#indexer.
10621062
type Indexer struct{ *BinaryExpression }
@@ -1642,6 +1642,9 @@ func (i *Indexer) GetName() string { return "Indexer" }
16421642
// GetName returns the name of the system operator.
16431643
func (a *IndexOf) GetName() string { return "IndexOf" }
16441644

1645+
// GetName returns the name of the system operator.
1646+
func (s *Substring) GetName() string { return "Substring" }
1647+
16451648
// GetName returns the name of the system operator.
16461649
func (m *Median) GetName() string { return "Median" }
16471650

parser/operators.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,21 @@ func (p *Parser) loadSystemOperators() error {
286286
}
287287
},
288288
},
289+
{
290+
name: "Substring",
291+
operands: [][]types.IType{
292+
{types.String, types.Integer},
293+
{types.String, types.Integer, types.Integer},
294+
},
295+
model: func() model.IExpression {
296+
return &model.Substring{
297+
// NaryExpression is used here because Substring can have 2 or 3 operands.
298+
NaryExpression: &model.NaryExpression{
299+
Expression: model.ResultType(types.String),
300+
},
301+
}
302+
},
303+
},
289304
// CONVERT QUANTITY OPERATOR
290305
{
291306
name: "ConvertQuantity",

0 commit comments

Comments
 (0)