From c80f6b1d0fecc8c2f05fdf47790ae21d8e8cddbe Mon Sep 17 00:00:00 2001 From: Willem Date: Sun, 12 Oct 2025 13:39:34 +0200 Subject: [PATCH 1/2] Update allowanylanguage.js Script was not working when testing in PDI. With help of AI wrote a script that has the same purpose, has a logical order, is well commented, and works (validated this on my PDI). --- .../AllowAnyLanguage/allowanylanguage.js | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/Specialized Areas/Regular Expressions/AllowAnyLanguage/allowanylanguage.js b/Specialized Areas/Regular Expressions/AllowAnyLanguage/allowanylanguage.js index c55fdad6ea..bda90636a7 100644 --- a/Specialized Areas/Regular Expressions/AllowAnyLanguage/allowanylanguage.js +++ b/Specialized Areas/Regular Expressions/AllowAnyLanguage/allowanylanguage.js @@ -1 +1,51 @@ -module.exports.trimNonCharacters = (str) => str.replace(/[^\p{L}\d()\s]+/ug, ''); +function trimNonCharacters(str) { + // Define Unicode ranges for each language group + var allowedRanges = [ + "a-zA-Z0-9()", // Basic Latin letters, digits, parentheses + "\\s", // Whitespace + + // Western European (Latin-1 Supplement) + "\\u00C0-\\u00FF", // e.g., é, ñ, ü + + // Central/Eastern European (Latin Extended-A) + "\\u0100-\\u017F", // e.g., Ą, Č, Ő + + // Cyrillic (Russian, Ukrainian, Bulgarian) + "\\u0400-\\u04FF", // e.g., мир, привет + + // Greek + "\\u0370-\\u03FF", // e.g., Γειά σου κόσμε + + // Arabic + "\\u0600-\\u06FF", // e.g., مرحبا بالعالم + + // Devanagari (Hindi, Sanskrit) + "\\u0900-\\u097F", // e.g., नमस्ते दुनिया + + // CJK Unified Ideographs (Chinese, Japanese Kanji, Korean Hanja) + "\\u4E00-\\u9FFF" // e.g., 你好,世界 + ]; + + // Build the regex pattern string + var pattern = "[^" + allowedRanges.join("") + "]+"; + + // Create the RegExp object + var regex = new RegExp(pattern, "g"); + + // Apply the regex to clean the string + return str.replace(regex, ''); +} + +// Example input with comments for each language +var input = + "Hello, " + // English: "Hello, " + "мир! " + // Russian: "world!" + "Γειά σου κόσμε! " + // Greek: "Hello world!" + "مرحبا بالعالم! " + // Arabic: "Hello world!" + "नमस्ते दुनिया! " + // Hindi: "Hello world!" + "你好,世界!"; // Chinese: "Hello, world!" + +var cleaned = trimNonCharacters(input); + +gs.info("Original: " + input); +gs.info("Cleaned: " + cleaned); From 0aa9598c0dd8ddcf396a4f22d49327b4837dd989 Mon Sep 17 00:00:00 2001 From: Willem Date: Sun, 12 Oct 2025 13:47:46 +0200 Subject: [PATCH 2/2] Update README.md Expanded the readme, to describe the refactored code, while I kept the original Elon joke. --- .../AllowAnyLanguage/README.md | 58 ++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/Specialized Areas/Regular Expressions/AllowAnyLanguage/README.md b/Specialized Areas/Regular Expressions/AllowAnyLanguage/README.md index 38b323c422..4ef0a6ab38 100644 --- a/Specialized Areas/Regular Expressions/AllowAnyLanguage/README.md +++ b/Specialized Areas/Regular Expressions/AllowAnyLanguage/README.md @@ -1 +1,57 @@ -Allow Any Language Character Remove Special Characters, Can be used to verify valid names. Sorry Elon Musk's First born. +# trimNonCharacters + +## Description + +This JavaScript function removes special characters from strings while preserving valid characters from multiple language groups. + +Useful for validating names or user input in internationalized applications. + +> Sorry, Elon Musk's firstborn — `X Æ A-12` might not make it through unscathed. + +## Features + +- Removes punctuation, emojis, and symbols +- Preserves: + - Basic Latin letters (A–Z, a–z) + - Digits (0–9) + - Whitespace and parentheses + - Accented characters (e.g., é, ñ, ü) + - Characters from: + - Central/Eastern European languages + - Cyrillic (Russian, Ukrainian) + - Greek + - Arabic + - Hindi/Sanskrit (Devanagari) + - Chinese, Japanese, Korean (CJK ideographs) + +## Compatibility + +- Fully compatible with **ServiceNow background scripts** +- Avoids unsupported features like: + - Unicode property escapes (`\p{L}`) + - Multi-line regex literals + - Inline comments inside regex + +## Usage + +Change the input string to your own text/variable, call the function with the input, handle the result: +``` +var input = "Hello, мир! Γειά σου κόσμε! مرحبا بالعالم! नमस्ते दुनिया! 你好,世界!"; +var cleaned = trimNonCharacters(input); +gs.info("Cleaned: " + cleaned); +``` +## Customization +Language support is modular. Unicode ranges are defined in an array and can be commented out or modified as needed: +``` +var allowedRanges = [ + "a-zA-Z0-9()", // Basic Latin + "\\s", // Whitespace + "\\u00C0-\\u00FF", // Western European + "\\u0100-\\u017F", // Central/Eastern European + "\\u0400-\\u04FF", // Cyrillic + "\\u0370-\\u03FF", // Greek + "\\u0600-\\u06FF", // Arabic + "\\u0900-\\u097F", // Hindi/Sanskrit + "\\u4E00-\\u9FFF" // Chinese, Japanese, Korean +]; +```