feat: add sanitizeTextForRender method (#58)

scmmishra · web-flow · commit 097e00a1e60f · 2025-09-04T11:42:12.000+05:30
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        node: ['16.x', '20.x', '23.x']
+        node: ['20.x', '23.x']
         os: ['ubuntu-latest']
 
     steps:
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@chatwoot/utils",
-  "version": "0.0.50",
+  "version": "0.0.51",
   "description": "Chatwoot utils",
   "private": false,
   "license": "MIT",
diff --git a/src/index.ts b/src/index.ts
@@ -16,7 +16,7 @@ import { toURL, isSameHost, isValidDomain } from './url';
 
 import { getRecipients } from './email';
 
-import { parseBoolean } from './string';
+import { parseBoolean, sanitizeTextForRender } from './string';
 import {
   sortAsc,
   quantile,
@@ -62,6 +62,7 @@ export {
   parseBoolean,
   quantile,
   replaceVariablesInMessage,
+  sanitizeTextForRender,
   sortAsc,
   splitName,
   toURL,
diff --git a/src/string.ts b/src/string.ts
@@ -3,7 +3,6 @@
  * @param {string | number} candidate - The string boolean value to be parsed
  * @return {boolean} - The parsed boolean value
  */
-
 export function parseBoolean(candidate: string | number) {
   try {
     // lowercase the string, so TRUE becomes true
@@ -16,3 +15,65 @@ export function parseBoolean(candidate: string | number) {
     return false;
   }
 }
+
+/**
+ * Sanitizes text for safe HTML rendering by escaping potentially dangerous characters
+ * while preserving valid HTML tags.
+ *
+ * This function performs the following transformations:
+ * - Converts newline characters (\n) to HTML line breaks (<br>)
+ * - Escapes stray '<' characters that are not part of valid HTML tags (e.g., "x < 5" → "x &lt; 5")
+ * - Escapes stray '>' characters that are not part of valid HTML tags (e.g., "x > 5" → "x &gt; 5")
+ * - Preserves valid HTML tags and their attributes (e.g., <div>, <span class="test">, </p>)
+ *
+ * LIMITATIONS: This regex-based approach has known limitations:
+ * - Cannot properly handle '>' characters inside HTML attributes (e.g., <div title="x > 5"> may not work correctly)
+ * - Complex nested quotes or edge cases may not be handled perfectly
+ * - For more complex HTML sanitization needs, consider using a proper HTML parser
+ *
+ * @param {string | null | undefined} text - The text to sanitize. Can be null or undefined.
+ * @returns {string} The sanitized text safe for HTML rendering, or the original value if null/undefined.
+ *
+ * @example
+ * sanitizeTextForRender('Hello\nWorld') // 'Hello<br>World'
+ * sanitizeTextForRender('if x < 5') // 'if x &lt; 5'
+ * sanitizeTextForRender('<div>Hello</div>') // '<div>Hello</div>'
+ * sanitizeTextForRender('Price < $100 <strong>Sale!</strong>') // 'Price &lt; $100 <strong>Sale!</strong>'
+ */
+export function sanitizeTextForRender(text: string | null | undefined) {
+  if (!text) return '';
+
+  return (
+    text
+      .replace(/\n/g, '<br>')
+
+      // Escape < that doesn't start a valid HTML tag
+      // Regex breakdown:
+      // <          - matches '<'
+      // (?!        - negative lookahead (not followed by)
+      //   \/?      - optional forward slash for closing tags
+      //   \w+      - one or more word characters (tag name)
+      //   (?:      - non-capturing group for attributes
+      //     \s+    - whitespace before attributes
+      //     [^>]*  - any characters except '>' (attribute content)
+      //   )?       - attributes are optional
+      //   \/?>     - optional self-closing slash, then '>'
+      // )          - end lookahead
+      .replace(/<(?!\/?\w+(?:\s+[^>]*)?\/?>)/g, '&lt;')
+
+      // Escape > that isn't part of an HTML tag
+      // Regex breakdown:
+      // (?<!       - negative lookbehind (not preceded by)
+      //   <        - opening '<'
+      //   \/?      - optional forward slash for closing tags
+      //   \w+      - one or more word characters (tag name)
+      //   (?:      - non-capturing group for attributes
+      //     \s+    - whitespace before attributes
+      //     [^>]*  - any characters except '>' (attribute content)
+      //   )?       - attributes are optional
+      //   \/?      - optional self-closing slash before >
+      // )          - end lookbehind
+      // >          - matches '>'
+      .replace(/(?<!<\/?\w+(?:\s+[^>]*)?\/?)>/g, '&gt;')
+  );
+}
diff --git a/test/string.test.ts b/test/string.test.ts
@@ -1,4 +1,4 @@
-import { parseBoolean } from '../src';
+import { parseBoolean, sanitizeTextForRender } from '../src';
 
 describe('#parseBoolean', () => {
   test('returns true for input "true"', () => {
@@ -37,3 +37,156 @@ describe('#parseBoolean', () => {
     expect(parseBoolean(undefined)).toBe(false);
   });
 });
+
+describe('#sanitizeTextForRender', () => {
+  it('should handle null and undefined values', () => {
+    expect(sanitizeTextForRender(null)).toBe('');
+    expect(sanitizeTextForRender(undefined)).toBe('');
+    expect(sanitizeTextForRender('')).toBe('');
+  });
+
+  it('should convert newlines to <br> tags', () => {
+    expect(sanitizeTextForRender('Line 1\nLine 2')).toBe('Line 1<br>Line 2');
+    expect(sanitizeTextForRender('Multiple\n\nNewlines')).toBe(
+      'Multiple<br><br>Newlines'
+    );
+  });
+
+  it('should escape stray < characters', () => {
+    expect(sanitizeTextForRender('if x < 5')).toBe('if x &lt; 5');
+    expect(sanitizeTextForRender('< this is not a tag')).toBe(
+      '&lt; this is not a tag'
+    );
+    expect(sanitizeTextForRender('price < $100')).toBe('price &lt; $100');
+  });
+
+  it('should escape stray > characters', () => {
+    expect(sanitizeTextForRender('if x > 5')).toBe('if x &gt; 5');
+    expect(sanitizeTextForRender('this is not a tag >')).toBe(
+      'this is not a tag &gt;'
+    );
+    expect(sanitizeTextForRender('score > 90%')).toBe('score &gt; 90%');
+  });
+
+  it('should escape both stray < and > characters', () => {
+    expect(sanitizeTextForRender('5 < x < 10')).toBe('5 &lt; x &lt; 10');
+    expect(sanitizeTextForRender('x > 5 && y < 10')).toBe(
+      'x &gt; 5 && y &lt; 10'
+    );
+  });
+
+  it('should preserve valid HTML tags', () => {
+    expect(sanitizeTextForRender('<div>Hello</div>')).toBe('<div>Hello</div>');
+    expect(sanitizeTextForRender('<span class="test">World</span>')).toBe(
+      '<span class="test">World</span>'
+    );
+    expect(sanitizeTextForRender('<br>')).toBe('<br>');
+    expect(sanitizeTextForRender('<img src="test.jpg" />')).toBe(
+      '<img src="test.jpg" />'
+    );
+  });
+
+  it('should preserve nested HTML tags', () => {
+    expect(sanitizeTextForRender('<div><span>Nested</span></div>')).toBe(
+      '<div><span>Nested</span></div>'
+    );
+    expect(
+      sanitizeTextForRender('<ul><li>Item 1</li><li>Item 2</li></ul>')
+    ).toBe('<ul><li>Item 1</li><li>Item 2</li></ul>');
+  });
+
+  it('should handle mixed content with valid tags and stray characters', () => {
+    expect(sanitizeTextForRender('Price < $100 <strong>on sale</strong>')).toBe(
+      'Price &lt; $100 <strong>on sale</strong>'
+    );
+    expect(sanitizeTextForRender('<p>x > 5</p> and y < 10')).toBe(
+      '<p>x &gt; 5</p> and y &lt; 10'
+    );
+  });
+
+  it('should handle edge cases with malformed HTML-like content', () => {
+    expect(sanitizeTextForRender('<<invalid>>')).toBe('&lt;<invalid>&gt;');
+    expect(sanitizeTextForRender('<not a tag')).toBe('&lt;not a tag');
+    expect(sanitizeTextForRender('not a tag>')).toBe('not a tag&gt;');
+  });
+
+  it('should handle email addresses and URLs with angle brackets', () => {
+    expect(sanitizeTextForRender('Contact: <user@example.com>')).toBe(
+      'Contact: &lt;user@example.com&gt;'
+    );
+    expect(sanitizeTextForRender('Email me at < user@example.com >')).toBe(
+      'Email me at &lt; user@example.com &gt;'
+    );
+  });
+
+  it('should handle mathematical expressions', () => {
+    expect(sanitizeTextForRender('if (x < y && y > z)')).toBe(
+      'if (x &lt; y && y &gt; z)'
+    );
+    expect(sanitizeTextForRender('array[i] < array[j]')).toBe(
+      'array[i] &lt; array[j]'
+    );
+  });
+
+  it('should handle HTML entities within valid tags', () => {
+    expect(sanitizeTextForRender('<div>&lt;escaped&gt;</div>')).toBe(
+      '<div>&lt;escaped&gt;</div>'
+    );
+    expect(sanitizeTextForRender('<span>already &amp; escaped</span>')).toBe(
+      '<span>already &amp; escaped</span>'
+    );
+  });
+
+  it('should handle complex real-world email content', () => {
+    const emailContent = `Hello,\n\nThe price is < $50 for items where quantity > 10.\n<p>Best regards,</p>\n<strong>Sales Team</strong>`;
+    const expected = `Hello,<br><br>The price is &lt; $50 for items where quantity &gt; 10.<br><p>Best regards,</p><br><strong>Sales Team</strong>`;
+    expect(sanitizeTextForRender(emailContent)).toBe(expected);
+  });
+
+  it('should handle quoted email content', () => {
+    const quoted = `Original message:\n> User wrote: x < 5\n<blockquote>Previous reply</blockquote>`;
+    const expected = `Original message:<br>&gt; User wrote: x &lt; 5<br><blockquote>Previous reply</blockquote>`;
+    expect(sanitizeTextForRender(quoted)).toBe(expected);
+  });
+
+  it('should handle self-closing tags correctly', () => {
+    expect(sanitizeTextForRender('<br />')).toBe('<br />');
+    expect(sanitizeTextForRender('<img src="test.jpg" />')).toBe(
+      '<img src="test.jpg" />'
+    );
+    expect(sanitizeTextForRender('<input type="text" value="test" />')).toBe(
+      '<input type="text" value="test" />'
+    );
+    expect(sanitizeTextForRender('<hr/>')).toBe('<hr/>');
+    expect(sanitizeTextForRender('Text before <br /> text after')).toBe(
+      'Text before <br /> text after'
+    );
+    expect(sanitizeTextForRender('<meta charset="UTF-8" />')).toBe(
+      '<meta charset="UTF-8" />'
+    );
+  });
+
+  it('should handle complex URLs in attributes', () => {
+    expect(
+      sanitizeTextForRender(
+        '<img src="https://example.com/image.jpg?width=100&height=200&format=webp" />'
+      )
+    ).toBe(
+      '<img src="https://example.com/image.jpg?width=100&height=200&format=webp" />'
+    );
+    expect(
+      sanitizeTextForRender(
+        '<a href="https://api.example.com/v2/users/123/profile?include=posts&sort=desc">Profile</a>'
+      )
+    ).toBe(
+      '<a href="https://api.example.com/v2/users/123/profile?include=posts&sort=desc">Profile</a>'
+    );
+    expect(
+      sanitizeTextForRender(
+        '<iframe src="//cdn.example.com/embed/video/12345?autoplay=1&loop=0" />'
+      )
+    ).toBe(
+      '<iframe src="//cdn.example.com/embed/video/12345?autoplay=1&loop=0" />'
+    );
+  });
+});

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@chatwoot/utils",`
`3`		`- "version": "0.0.50",`
	`3`	`+ "version": "0.0.51",`
`4`	`4`	`"description": "Chatwoot utils",`
`5`	`5`	`"private": false,`
`6`	`6`	`"license": "MIT",`