From e85398a0bfd01489d6a535866ca6ddec61092ccb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 12:37:32 +0000 Subject: [PATCH 1/5] Initial plan From 18f5930638e76c949656969cc02eb86ea130eada Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 12:45:13 +0000 Subject: [PATCH 2/5] Add slice, urlEncode, base64Encode, and coalesce functions Co-authored-by: Sander-Toonen <5106372+Sander-Toonen@users.noreply.github.com> --- src/functions/string/operations.ts | 110 ++++++++++++++++++ src/parsing/parser.ts | 9 +- test/functions/functions-string.ts | 178 +++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+), 2 deletions(-) diff --git a/src/functions/string/operations.ts b/src/functions/string/operations.ts index f0d86ae..89fe25c 100644 --- a/src/functions/string/operations.ts +++ b/src/functions/string/operations.ts @@ -460,3 +460,113 @@ export function padBoth(str: string | undefined, targetLength: number | undefine return leftPad + str + rightPad; } + +/** + * Extracts a portion of a string or array + * Supports negative indices (counting from the end) + * @param s - The string or array to slice + * @param start - Start index (negative counts from end) + * @param end - End index (optional, negative counts from end) + */ +export function slice( + s: string | any[] | undefined, + start: number | undefined, + end?: number +): string | any[] | undefined { + if (s === undefined || start === undefined) { + return undefined; + } + if (typeof s !== 'string' && !Array.isArray(s)) { + throw new Error('First argument to slice must be a string or array'); + } + if (typeof start !== 'number') { + throw new Error('Second argument to slice must be a number'); + } + if (end !== undefined && typeof end !== 'number') { + throw new Error('Third argument to slice must be a number'); + } + + return s.slice(start, end); +} + +/** + * URL-encodes a string + * Uses encodeURIComponent for safe encoding + */ +export function urlEncode(str: string | undefined): string | undefined { + if (str === undefined) { + return undefined; + } + if (typeof str !== 'string') { + throw new Error('Argument to urlEncode must be a string'); + } + return encodeURIComponent(str); +} + +/** + * Base64-encodes a string + * Handles UTF-8 encoding properly + */ +export function base64Encode(str: string | undefined): string | undefined { + if (str === undefined) { + return undefined; + } + if (typeof str !== 'string') { + throw new Error('Argument to base64Encode must be a string'); + } + + // Base64 alphabet + const base64Chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; + + // Convert string to UTF-8 bytes + const utf8: number[] = []; + for (let i = 0; i < str.length; i++) { + let code = str.charCodeAt(i); + if (code < 0x80) { + utf8.push(code); + } else if (code < 0x800) { + utf8.push(0xc0 | (code >> 6), 0x80 | (code & 0x3f)); + } else if (code >= 0xd800 && code < 0xdc00) { + // Surrogate pair + i++; + const low = str.charCodeAt(i); + code = 0x10000 + ((code - 0xd800) << 10) + (low - 0xdc00); + utf8.push( + 0xf0 | (code >> 18), + 0x80 | ((code >> 12) & 0x3f), + 0x80 | ((code >> 6) & 0x3f), + 0x80 | (code & 0x3f) + ); + } else { + utf8.push(0xe0 | (code >> 12), 0x80 | ((code >> 6) & 0x3f), 0x80 | (code & 0x3f)); + } + } + + // Encode bytes to base64 + let result = ''; + for (let i = 0; i < utf8.length; i += 3) { + const b1 = utf8[i]; + const b2 = utf8[i + 1]; + const b3 = utf8[i + 2]; + + result += base64Chars[b1 >> 2]; + result += base64Chars[((b1 & 0x03) << 4) | ((b2 ?? 0) >> 4)]; + result += b2 !== undefined ? base64Chars[((b2 & 0x0f) << 2) | ((b3 ?? 0) >> 6)] : '='; + result += b3 !== undefined ? base64Chars[b3 & 0x3f] : '='; + } + + return result; +} + +/** + * Returns the first non-null and non-empty string value from the arguments + * @param args - Any number of values to check + */ +export function coalesceString(...args: any[]): any { + for (const arg of args) { + if (arg !== undefined && arg !== null && arg !== '') { + return arg; + } + } + return args.length > 0 ? args[args.length - 1] : undefined; +} diff --git a/src/parsing/parser.ts b/src/parsing/parser.ts index 5728d93..523b9e5 100644 --- a/src/parsing/parser.ts +++ b/src/parsing/parser.ts @@ -6,7 +6,7 @@ import { Expression } from '../core/expression.js'; import type { Value, VariableResolveResult, Values } from '../types/values.js'; import type { Instruction } from './instruction.js'; import type { OperatorFunction } from '../types/parser.js'; -import { atan2, condition, fac, filter, fold, gamma, hypot, indexOf, join, map, max, min, random, roundTo, sum, json, stringLength, isEmpty, stringContains, startsWith, endsWith, searchCount, trim, toUpper, toLower, toTitle, split, repeat, reverse, left, right, replace, replaceFirst, naturalSort, toNumber, toBoolean, padLeft, padRight, padBoth } from '../functions/index.js'; +import { atan2, condition, fac, filter, fold, gamma, hypot, indexOf, join, map, max, min, random, roundTo, sum, json, stringLength, isEmpty, stringContains, startsWith, endsWith, searchCount, trim, toUpper, toLower, toTitle, split, repeat, reverse, left, right, replace, replaceFirst, naturalSort, toNumber, toBoolean, padLeft, padRight, padBoth, slice, urlEncode, base64Encode, coalesceString } from '../functions/index.js'; import { add, sub, @@ -219,7 +219,12 @@ export class Parser { toBoolean: toBoolean, padLeft: padLeft, padRight: padRight, - padBoth: padBoth + padBoth: padBoth, + // New functions + slice: slice, + urlEncode: urlEncode, + base64Encode: base64Encode, + coalesce: coalesceString }; this.numericConstants = { diff --git a/test/functions/functions-string.ts b/test/functions/functions-string.ts index d7b384c..947ef04 100644 --- a/test/functions/functions-string.ts +++ b/test/functions/functions-string.ts @@ -633,4 +633,182 @@ describe('String Functions TypeScript Test', function () { assert.throws(() => parser.evaluate('padBoth("test", 5, 0)'), /Third argument.*must be a string/); }); }); + + describe('slice(s, start, end?)', function () { + describe('with strings', function () { + it('should extract a portion of a string', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('slice("hello world", 0, 5)'), 'hello'); + assert.strictEqual(parser.evaluate('slice("hello world", 6, 11)'), 'world'); + assert.strictEqual(parser.evaluate('slice("hello world", 6)'), 'world'); + }); + + it('should handle negative indices', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('slice("hello world", -5)'), 'world'); + assert.strictEqual(parser.evaluate('slice("hello world", -5, -1)'), 'worl'); + assert.strictEqual(parser.evaluate('slice("hello world", 0, -6)'), 'hello'); + }); + + it('should return empty string when start >= end', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('slice("hello", 3, 2)'), ''); + assert.strictEqual(parser.evaluate('slice("hello", 5, 5)'), ''); + }); + + it('should return undefined if string is undefined', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('slice(undefined, 0, 5)'), undefined); + }); + + it('should return undefined if start is undefined', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('slice("hello", undefined)'), undefined); + }); + + it('should throw error for non-string and non-array first argument', function () { + const parser = new Parser(); + assert.throws(() => parser.evaluate('slice(123, 0, 5)'), /First argument.*must be a string or array/); + }); + + it('should throw error for non-number start', function () { + const parser = new Parser(); + assert.throws(() => parser.evaluate('slice("hello", "0", 5)'), /Second argument.*must be a number/); + }); + + it('should throw error for non-number end', function () { + const parser = new Parser(); + assert.throws(() => parser.evaluate('slice("hello", 0, "5")'), /Third argument.*must be a number/); + }); + }); + + describe('with arrays', function () { + it('should extract a portion of an array', function () { + const parser = new Parser(); + assert.deepStrictEqual(parser.evaluate('slice([1,2,3,4,5], 0, 3)'), [1, 2, 3]); + assert.deepStrictEqual(parser.evaluate('slice([1,2,3,4,5], 2)'), [3, 4, 5]); + }); + + it('should handle negative indices with arrays', function () { + const parser = new Parser(); + assert.deepStrictEqual(parser.evaluate('slice([1,2,3,4,5], -2)'), [4, 5]); + assert.deepStrictEqual(parser.evaluate('slice([1,2,3,4,5], -3, -1)'), [3, 4]); + }); + + it('should return empty array when start >= end', function () { + const parser = new Parser(); + assert.deepStrictEqual(parser.evaluate('slice([1,2,3], 2, 1)'), []); + }); + }); + }); + + describe('urlEncode(str)', function () { + it('should URL-encode a string', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('urlEncode("hello world")'), 'hello%20world'); + assert.strictEqual(parser.evaluate('urlEncode("foo=bar&baz=qux")'), 'foo%3Dbar%26baz%3Dqux'); + assert.strictEqual(parser.evaluate('urlEncode("test")'), 'test'); + }); + + it('should handle special characters', function () { + const parser = new Parser(); + // encodeURIComponent encodes @, #, $, %, ^, &, but not !, *, (, ) + assert.strictEqual(parser.evaluate('urlEncode("!@#$%^&*()")'), '!%40%23%24%25%5E%26*()'); + assert.strictEqual(parser.evaluate('urlEncode("a/b/c")'), 'a%2Fb%2Fc'); + }); + + it('should return empty string for empty input', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('urlEncode("")'), ''); + }); + + it('should return undefined if argument is undefined', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('urlEncode(undefined)'), undefined); + }); + + it('should throw error for non-string argument', function () { + const parser = new Parser(); + assert.throws(() => parser.evaluate('urlEncode(123)'), /must be a string/); + }); + }); + + describe('base64Encode(str)', function () { + it('should Base64-encode a string', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Encode("hello")'), 'aGVsbG8='); + assert.strictEqual(parser.evaluate('base64Encode("Hello World")'), 'SGVsbG8gV29ybGQ='); + assert.strictEqual(parser.evaluate('base64Encode("test")'), 'dGVzdA=='); + }); + + it('should handle empty string', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Encode("")'), ''); + }); + + it('should handle UTF-8 characters', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Encode("héllo")'), 'aMOpbGxv'); + assert.strictEqual(parser.evaluate('base64Encode("日本語")'), '5pel5pys6Kqe'); + }); + + it('should return undefined if argument is undefined', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Encode(undefined)'), undefined); + }); + + it('should throw error for non-string argument', function () { + const parser = new Parser(); + assert.throws(() => parser.evaluate('base64Encode(123)'), /must be a string/); + }); + }); + + describe('coalesce(a, b, ...)', function () { + it('should return the first non-null, non-empty string value', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('coalesce("hello", "world")'), 'hello'); + assert.strictEqual(parser.evaluate('coalesce("", "world")'), 'world'); + assert.strictEqual(parser.evaluate('coalesce(null, "world")'), 'world'); + assert.strictEqual(parser.evaluate('coalesce(undefined, "world")'), 'world'); + }); + + it('should work with multiple arguments', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('coalesce("", null, undefined, "found")'), 'found'); + assert.strictEqual(parser.evaluate('coalesce(null, null, null, "last")'), 'last'); + }); + + it('should return non-string values if they are first non-null/non-empty', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('coalesce(null, 42)'), 42); + assert.strictEqual(parser.evaluate('coalesce("", true)'), true); + assert.deepStrictEqual(parser.evaluate('coalesce(null, [1,2,3])'), [1, 2, 3]); + }); + + it('should return 0 as a valid value (not empty)', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('coalesce(0, 42)'), 0); + assert.strictEqual(parser.evaluate('coalesce(null, 0, 42)'), 0); + }); + + it('should return false as a valid value (not empty)', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('coalesce(false, true)'), false); + assert.strictEqual(parser.evaluate('coalesce(null, false, true)'), false); + }); + + it('should return last value if all are null/undefined/empty', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('coalesce(null, undefined, "")'), ''); + assert.strictEqual(parser.evaluate('coalesce("", "", "")'), ''); + assert.strictEqual(parser.evaluate('coalesce(null, null, null)'), null); + }); + + it('should handle single argument', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('coalesce("hello")'), 'hello'); + assert.strictEqual(parser.evaluate('coalesce(null)'), null); + assert.strictEqual(parser.evaluate('coalesce("")'), ''); + }); + }); }); From 80fac49a1e27ac4aacff69895bc20ab969d557ea Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 12:47:27 +0000 Subject: [PATCH 3/5] Fix potential out-of-bounds access in base64Encode surrogate pair handling Co-authored-by: Sander-Toonen <5106372+Sander-Toonen@users.noreply.github.com> --- src/functions/string/operations.ts | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/functions/string/operations.ts b/src/functions/string/operations.ts index 89fe25c..943facc 100644 --- a/src/functions/string/operations.ts +++ b/src/functions/string/operations.ts @@ -526,17 +526,22 @@ export function base64Encode(str: string | undefined): string | undefined { utf8.push(code); } else if (code < 0x800) { utf8.push(0xc0 | (code >> 6), 0x80 | (code & 0x3f)); - } else if (code >= 0xd800 && code < 0xdc00) { - // Surrogate pair - i++; - const low = str.charCodeAt(i); - code = 0x10000 + ((code - 0xd800) << 10) + (low - 0xdc00); - utf8.push( - 0xf0 | (code >> 18), - 0x80 | ((code >> 12) & 0x3f), - 0x80 | ((code >> 6) & 0x3f), - 0x80 | (code & 0x3f) - ); + } else if (code >= 0xd800 && code < 0xdc00 && i + 1 < str.length) { + // Surrogate pair - check bounds before accessing next character + const low = str.charCodeAt(i + 1); + if (low >= 0xdc00 && low < 0xe000) { + i++; + code = 0x10000 + ((code - 0xd800) << 10) + (low - 0xdc00); + utf8.push( + 0xf0 | (code >> 18), + 0x80 | ((code >> 12) & 0x3f), + 0x80 | ((code >> 6) & 0x3f), + 0x80 | (code & 0x3f) + ); + } else { + // Unpaired high surrogate - encode as-is (will produce invalid UTF-8, but matches typical behavior) + utf8.push(0xe0 | (code >> 12), 0x80 | ((code >> 6) & 0x3f), 0x80 | (code & 0x3f)); + } } else { utf8.push(0xe0 | (code >> 12), 0x80 | ((code >> 6) & 0x3f), 0x80 | (code & 0x3f)); } From b0fb3f9667edf063ec7c4091312894dd105b0a03 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:07:56 +0000 Subject: [PATCH 4/5] Add documentation for new functions and update language server Co-authored-by: Sander-Toonen <5106372+Sander-Toonen@users.noreply.github.com> --- docs/syntax.md | 27 +++++++++++++++++ .../language-service.documentation.ts | 30 +++++++++++++++++++ src/parsing/parser.ts | 1 - 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/docs/syntax.md b/docs/syntax.md index 4317043..7892aed 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -155,6 +155,20 @@ The parser includes comprehensive string manipulation capabilities. | padRight(str, len, padChar?) | Pads a string on the right with spaces (or optional padding character) to reach the target length. | | padBoth(str, len, padChar?) | Pads a string on both sides with spaces (or optional padding character) to reach the target length. If an odd number of padding characters is needed, the extra character is added on the right. | +### Slicing and Encoding + +| Function | Description | +|:--------------------- |:----------- | +| slice(s, start, end?) | Extracts a portion of a string or array. Supports negative indices (e.g., -1 for last element). | +| urlEncode(str) | URL-encodes a string using `encodeURIComponent`. | +| base64Encode(str) | Base64-encodes a string with proper UTF-8 support. | + +### Utility Functions + +| Function | Description | +|:--------------------- |:----------- | +| coalesce(a, b, ...) | Returns the first non-null and non-empty string value from the arguments. Numbers and booleans (including 0 and false) are considered valid values. | + ### String Function Examples ```js @@ -203,6 +217,19 @@ parser.evaluate('padRight("5", 3, "0")'); // "500" parser.evaluate('padBoth("hi", 6)'); // " hi " parser.evaluate('padBoth("hi", 6, "-")'); // "--hi--" +// Slicing +parser.evaluate('slice("hello world", 0, 5)'); // "hello" +parser.evaluate('slice("hello world", -5)'); // "world" +parser.evaluate('slice([1, 2, 3, 4, 5], -2)'); // [4, 5] + +// Encoding +parser.evaluate('urlEncode("foo=bar&baz")'); // "foo%3Dbar%26baz" +parser.evaluate('base64Encode("hello")'); // "aGVsbG8=" + +// Coalesce +parser.evaluate('coalesce("", null, "found")'); // "found" +parser.evaluate('coalesce(null, 0, 42)'); // 0 + // Complex string operations parser.evaluate('toUpper(trim(left(" hello world ", 10)))'); // "HELLO WOR" ``` diff --git a/src/language-service/language-service.documentation.ts b/src/language-service/language-service.documentation.ts index 7d7476e..30d0052 100644 --- a/src/language-service/language-service.documentation.ts +++ b/src/language-service/language-service.documentation.ts @@ -225,6 +225,36 @@ export const BUILTIN_FUNCTION_DOCS: Record = { { name: 'length', description: 'Target length.' }, { name: 'padStr', description: 'Padding string.', optional: true } ] + }, + slice: { + name: 'slice', + description: 'Extract a portion of a string or array. Supports negative indices.', + params: [ + { name: 's', description: 'Input string or array.' }, + { name: 'start', description: 'Start index (negative counts from end).' }, + { name: 'end', description: 'End index (negative counts from end).', optional: true } + ] + }, + urlEncode: { + name: 'urlEncode', + description: 'URL-encode a string using encodeURIComponent.', + params: [ + { name: 'str', description: 'String to encode.' } + ] + }, + base64Encode: { + name: 'base64Encode', + description: 'Base64-encode a string with UTF-8 support.', + params: [ + { name: 'str', description: 'String to encode.' } + ] + }, + coalesce: { + name: 'coalesce', + description: 'Return the first non-null and non-empty string value from the arguments.', + params: [ + { name: 'values', description: 'Values to check.', isVariadic: true } + ] } }; diff --git a/src/parsing/parser.ts b/src/parsing/parser.ts index 523b9e5..cf6ebd0 100644 --- a/src/parsing/parser.ts +++ b/src/parsing/parser.ts @@ -220,7 +220,6 @@ export class Parser { padLeft: padLeft, padRight: padRight, padBoth: padBoth, - // New functions slice: slice, urlEncode: urlEncode, base64Encode: base64Encode, From c537880b69ff6a9c0e04056fa06e581fd279218f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:23:06 +0000 Subject: [PATCH 5/5] Simplify base64Encode using btoa and add base64Decode function Co-authored-by: Sander-Toonen <5106372+Sander-Toonen@users.noreply.github.com> --- docs/syntax.md | 2 + src/functions/string/operations.ts | 71 +++++++------------ .../language-service.documentation.ts | 7 ++ src/parsing/parser.ts | 3 +- test/functions/functions-string.ts | 41 +++++++++++ 5 files changed, 78 insertions(+), 46 deletions(-) diff --git a/docs/syntax.md b/docs/syntax.md index 7892aed..84865c4 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -162,6 +162,7 @@ The parser includes comprehensive string manipulation capabilities. | slice(s, start, end?) | Extracts a portion of a string or array. Supports negative indices (e.g., -1 for last element). | | urlEncode(str) | URL-encodes a string using `encodeURIComponent`. | | base64Encode(str) | Base64-encodes a string with proper UTF-8 support. | +| base64Decode(str) | Base64-decodes a string with proper UTF-8 support. | ### Utility Functions @@ -225,6 +226,7 @@ parser.evaluate('slice([1, 2, 3, 4, 5], -2)'); // [4, 5] // Encoding parser.evaluate('urlEncode("foo=bar&baz")'); // "foo%3Dbar%26baz" parser.evaluate('base64Encode("hello")'); // "aGVsbG8=" +parser.evaluate('base64Decode("aGVsbG8=")'); // "hello" // Coalesce parser.evaluate('coalesce("", null, "found")'); // "found" diff --git a/src/functions/string/operations.ts b/src/functions/string/operations.ts index 943facc..533328c 100644 --- a/src/functions/string/operations.ts +++ b/src/functions/string/operations.ts @@ -503,9 +503,13 @@ export function urlEncode(str: string | undefined): string | undefined { return encodeURIComponent(str); } +// Global declarations for btoa/atob (available in Node.js 16+ and browsers) +declare function btoa(data: string): string; +declare function atob(data: string): string; + /** * Base64-encodes a string - * Handles UTF-8 encoding properly + * Handles UTF-8 encoding properly using btoa */ export function base64Encode(str: string | undefined): string | undefined { if (str === undefined) { @@ -514,53 +518,30 @@ export function base64Encode(str: string | undefined): string | undefined { if (typeof str !== 'string') { throw new Error('Argument to base64Encode must be a string'); } + // Encode UTF-8 string to base64 using btoa + // First encode as UTF-8 bytes, then convert to binary string for btoa + const utf8Str = unescape(encodeURIComponent(str)); + return btoa(utf8Str); +} - // Base64 alphabet - const base64Chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; - - // Convert string to UTF-8 bytes - const utf8: number[] = []; - for (let i = 0; i < str.length; i++) { - let code = str.charCodeAt(i); - if (code < 0x80) { - utf8.push(code); - } else if (code < 0x800) { - utf8.push(0xc0 | (code >> 6), 0x80 | (code & 0x3f)); - } else if (code >= 0xd800 && code < 0xdc00 && i + 1 < str.length) { - // Surrogate pair - check bounds before accessing next character - const low = str.charCodeAt(i + 1); - if (low >= 0xdc00 && low < 0xe000) { - i++; - code = 0x10000 + ((code - 0xd800) << 10) + (low - 0xdc00); - utf8.push( - 0xf0 | (code >> 18), - 0x80 | ((code >> 12) & 0x3f), - 0x80 | ((code >> 6) & 0x3f), - 0x80 | (code & 0x3f) - ); - } else { - // Unpaired high surrogate - encode as-is (will produce invalid UTF-8, but matches typical behavior) - utf8.push(0xe0 | (code >> 12), 0x80 | ((code >> 6) & 0x3f), 0x80 | (code & 0x3f)); - } - } else { - utf8.push(0xe0 | (code >> 12), 0x80 | ((code >> 6) & 0x3f), 0x80 | (code & 0x3f)); - } +/** + * Base64-decodes a string + * Handles UTF-8 decoding properly using atob + */ +export function base64Decode(str: string | undefined): string | undefined { + if (str === undefined) { + return undefined; } - - // Encode bytes to base64 - let result = ''; - for (let i = 0; i < utf8.length; i += 3) { - const b1 = utf8[i]; - const b2 = utf8[i + 1]; - const b3 = utf8[i + 2]; - - result += base64Chars[b1 >> 2]; - result += base64Chars[((b1 & 0x03) << 4) | ((b2 ?? 0) >> 4)]; - result += b2 !== undefined ? base64Chars[((b2 & 0x0f) << 2) | ((b3 ?? 0) >> 6)] : '='; - result += b3 !== undefined ? base64Chars[b3 & 0x3f] : '='; + if (typeof str !== 'string') { + throw new Error('Argument to base64Decode must be a string'); + } + try { + // Decode base64 to binary string, then decode UTF-8 + const binaryStr = atob(str); + return decodeURIComponent(escape(binaryStr)); + } catch { + throw new Error('Invalid base64 string'); } - - return result; } /** diff --git a/src/language-service/language-service.documentation.ts b/src/language-service/language-service.documentation.ts index 30d0052..dc5eb3b 100644 --- a/src/language-service/language-service.documentation.ts +++ b/src/language-service/language-service.documentation.ts @@ -249,6 +249,13 @@ export const BUILTIN_FUNCTION_DOCS: Record = { { name: 'str', description: 'String to encode.' } ] }, + base64Decode: { + name: 'base64Decode', + description: 'Base64-decode a string with UTF-8 support.', + params: [ + { name: 'str', description: 'Base64 string to decode.' } + ] + }, coalesce: { name: 'coalesce', description: 'Return the first non-null and non-empty string value from the arguments.', diff --git a/src/parsing/parser.ts b/src/parsing/parser.ts index cf6ebd0..f629af2 100644 --- a/src/parsing/parser.ts +++ b/src/parsing/parser.ts @@ -6,7 +6,7 @@ import { Expression } from '../core/expression.js'; import type { Value, VariableResolveResult, Values } from '../types/values.js'; import type { Instruction } from './instruction.js'; import type { OperatorFunction } from '../types/parser.js'; -import { atan2, condition, fac, filter, fold, gamma, hypot, indexOf, join, map, max, min, random, roundTo, sum, json, stringLength, isEmpty, stringContains, startsWith, endsWith, searchCount, trim, toUpper, toLower, toTitle, split, repeat, reverse, left, right, replace, replaceFirst, naturalSort, toNumber, toBoolean, padLeft, padRight, padBoth, slice, urlEncode, base64Encode, coalesceString } from '../functions/index.js'; +import { atan2, condition, fac, filter, fold, gamma, hypot, indexOf, join, map, max, min, random, roundTo, sum, json, stringLength, isEmpty, stringContains, startsWith, endsWith, searchCount, trim, toUpper, toLower, toTitle, split, repeat, reverse, left, right, replace, replaceFirst, naturalSort, toNumber, toBoolean, padLeft, padRight, padBoth, slice, urlEncode, base64Encode, base64Decode, coalesceString } from '../functions/index.js'; import { add, sub, @@ -223,6 +223,7 @@ export class Parser { slice: slice, urlEncode: urlEncode, base64Encode: base64Encode, + base64Decode: base64Decode, coalesce: coalesceString }; diff --git a/test/functions/functions-string.ts b/test/functions/functions-string.ts index 947ef04..d975a6e 100644 --- a/test/functions/functions-string.ts +++ b/test/functions/functions-string.ts @@ -763,6 +763,47 @@ describe('String Functions TypeScript Test', function () { }); }); + describe('base64Decode(str)', function () { + it('should Base64-decode a string', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Decode("aGVsbG8=")'), 'hello'); + assert.strictEqual(parser.evaluate('base64Decode("SGVsbG8gV29ybGQ=")'), 'Hello World'); + assert.strictEqual(parser.evaluate('base64Decode("dGVzdA==")'), 'test'); + }); + + it('should handle empty string', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Decode("")'), ''); + }); + + it('should handle UTF-8 characters', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Decode("aMOpbGxv")'), 'héllo'); + assert.strictEqual(parser.evaluate('base64Decode("5pel5pys6Kqe")'), '日本語'); + }); + + it('should return undefined if argument is undefined', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Decode(undefined)'), undefined); + }); + + it('should throw error for non-string argument', function () { + const parser = new Parser(); + assert.throws(() => parser.evaluate('base64Decode(123)'), /must be a string/); + }); + + it('should throw error for invalid base64 string', function () { + const parser = new Parser(); + assert.throws(() => parser.evaluate('base64Decode("!!invalid!!")'), /Invalid base64 string/); + }); + + it('should roundtrip with base64Encode', function () { + const parser = new Parser(); + assert.strictEqual(parser.evaluate('base64Decode(base64Encode("hello"))'), 'hello'); + assert.strictEqual(parser.evaluate('base64Decode(base64Encode("日本語"))'), '日本語'); + }); + }); + describe('coalesce(a, b, ...)', function () { it('should return the first non-null, non-empty string value', function () { const parser = new Parser();