From 0df146a5957d10df4f4b35b2ce375151ca0d0da1 Mon Sep 17 00:00:00 2001
From: louzt <179385168+louzt@users.noreply.github.com>
Date: Wed, 22 Apr 2026 16:03:31 -0600
Subject: [PATCH] fix: harden IRCv3 tag parsing and multiline protocol helpers

---
 src/lib/ircUtils.tsx              | 62 ++++++++++++++++++++++++++-----
 src/lib/messageProtocol.ts        | 46 +++++++++++++++++++----
 tests/lib/ircUtils.test.ts        | 48 ++++++++++++++++++++++++
 tests/lib/messageProtocol.test.ts | 35 +++++++++++++++++
 4 files changed, 175 insertions(+), 16 deletions(-)
 create mode 100644 tests/lib/ircUtils.test.ts
 create mode 100644 tests/lib/messageProtocol.test.ts

diff --git a/src/lib/ircUtils.tsx b/src/lib/ircUtils.tsx
index eae3eca8..6577ba45 100644
--- a/src/lib/ircUtils.tsx
+++ b/src/lib/ircUtils.tsx
@@ -55,13 +55,57 @@ export function parseMessageTags(tags: string): Record<string, string> {
   const tagPairs = tags.substring(1).split(";");
 
   for (const tag of tagPairs) {
-    const [key, value] = tag.split("=");
-    parsedTags[key] = value?.trim() ?? ""; // empty string fallback
+    const separatorIndex = tag.indexOf("=");
+    const key = separatorIndex === -1 ? tag : tag.slice(0, separatorIndex);
+    const rawValue = separatorIndex === -1 ? "" : tag.slice(separatorIndex + 1);
+
+    parsedTags[key] = unescapeIrcMessageTagValue(rawValue);
   }
 
   return parsedTags;
 }
 
+function unescapeIrcMessageTagValue(value: string): string {
+  let unescaped = "";
+
+  for (let index = 0; index < value.length; index += 1) {
+    const current = value[index];
+
+    if (current !== "\\") {
+      unescaped += current;
+      continue;
+    }
+
+    const next = value[index + 1];
+    if (next === undefined) break;
+
+    index += 1;
+
+    switch (next) {
+      case ":":
+        unescaped += ";";
+        break;
+      case "s":
+        unescaped += " ";
+        break;
+      case "\\":
+        unescaped += "\\";
+        break;
+      case "r":
+        unescaped += "\r";
+        break;
+      case "n":
+        unescaped += "\n";
+        break;
+      default:
+        unescaped += next;
+        break;
+    }
+  }
+
+  return unescaped;
+}
+
 /**
  * Check if a user is verified based on the account tag matching their nickname.
  * According to IRCv3 account-tag spec, if the account tag matches the sender's nick
@@ -84,13 +128,13 @@ export function parseIsupport(tokens: string): Record<string, string> {
   const tokenPairs = tokens.split(" ");
 
   for (const token of tokenPairs) {
-    const [key, value] = token.split("=");
-    if (value) {
-      // Replace \x20 with actual space character
-      tokenMap[key] = value.replace(/\\x20/g, " ");
-    } else {
-      tokenMap[key] = ""; // empty string fallback
-    }
+    const separatorIndex = token.indexOf("=");
+    const key = separatorIndex === -1 ? token : token.slice(0, separatorIndex);
+    const rawValue =
+      separatorIndex === -1 ? "" : token.slice(separatorIndex + 1);
+
+    // Replace \x20 with actual space character
+    tokenMap[key] = rawValue.replace(/\\x20/g, " ");
   }
 
   return tokenMap;
diff --git a/src/lib/messageProtocol.ts b/src/lib/messageProtocol.ts
index e2837dcd..c552a2ae 100644
--- a/src/lib/messageProtocol.ts
+++ b/src/lib/messageProtocol.ts
@@ -2,6 +2,37 @@
  * IRC protocol utilities for message handling
  */
 
+const utf8Encoder = new TextEncoder();
+
+function getUtf8ByteLength(value: string): number {
+  return utf8Encoder.encode(value).length;
+}
+
+function splitTokenByUtf8Bytes(token: string, maxBytes: number): string[] {
+  const chunks: string[] = [];
+  let currentChunk = "";
+
+  for (const character of token) {
+    const candidateChunk = `${currentChunk}${character}`;
+
+    if (getUtf8ByteLength(candidateChunk) > maxBytes) {
+      if (currentChunk) {
+        chunks.push(currentChunk);
+      }
+      currentChunk = character;
+      continue;
+    }
+
+    currentChunk = candidateChunk;
+  }
+
+  if (currentChunk) {
+    chunks.push(currentChunk);
+  }
+
+  return chunks;
+}
+
 /**
  * Helper function to split long messages while respecting IRC protocol limits
  * @param message - The message to split
@@ -17,7 +48,7 @@ export const splitLongMessage = (
   // Available space for the actual message content
   const maxMessageLength = 512 - protocolOverhead;
 
-  if (message.length <= maxMessageLength) {
+  if (getUtf8ByteLength(message) <= maxMessageLength) {
     return [message];
   }
 
@@ -26,7 +57,7 @@ export const splitLongMessage = (
   const words = message.split(" ");
 
   for (const word of words) {
-    if (word.length > maxMessageLength) {
+    if (getUtf8ByteLength(word) > maxMessageLength) {
       // If a single word is too long, we have to break it
       if (currentLine) {
         lines.push(currentLine);
@@ -34,10 +65,11 @@ export const splitLongMessage = (
       }
 
       // Split the long word
-      for (let i = 0; i < word.length; i += maxMessageLength) {
-        lines.push(word.slice(i, i + maxMessageLength));
-      }
-    } else if (`${currentLine} ${word}`.length > maxMessageLength) {
+      lines.push(...splitTokenByUtf8Bytes(word, maxMessageLength));
+    } else if (
+      getUtf8ByteLength(currentLine ? `${currentLine} ${word}` : word) >
+      maxMessageLength
+    ) {
       // Adding this word would exceed the limit
       if (currentLine) {
         lines.push(currentLine);
@@ -87,5 +119,5 @@ export const calculateProtocolOverhead = (target: string): number => {
  * @returns A unique batch identifier
  */
 export const createBatchId = (): string => {
-  return `ml-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+  return `ml-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`;
 };
diff --git a/tests/lib/ircUtils.test.ts b/tests/lib/ircUtils.test.ts
new file mode 100644
index 00000000..6a908763
--- /dev/null
+++ b/tests/lib/ircUtils.test.ts
@@ -0,0 +1,48 @@
+import { describe, expect, test } from "vitest";
+import { parseIsupport, parseMessageTags } from "../../src/lib/ircUtils";
+
+describe("parseMessageTags", () => {
+  test("unescapes IRCv3 message tag values without trimming meaningful data", () => {
+    expect(parseMessageTags("@+example=raw+:=,escaped\\:\\s\\\\")).toEqual({
+      "+example": "raw+:=,escaped; \\",
+    });
+  });
+
+  test("preserves everything after the first equals sign in a tag value", () => {
+    expect(parseMessageTags("@foo=a=b=c")).toEqual({
+      foo: "a=b=c",
+    });
+  });
+
+  test("treats empty and missing values as empty strings", () => {
+    expect(parseMessageTags("@foo;bar=")).toEqual({
+      foo: "",
+      bar: "",
+    });
+  });
+
+  test("drops invalid escape backslashes and trailing lone backslashes per spec", () => {
+    expect(parseMessageTags("@foo=\\b;bar=test\\")).toEqual({
+      foo: "b",
+      bar: "test",
+    });
+  });
+});
+
+describe("parseIsupport", () => {
+  test("preserves everything after the first equals sign in token values", () => {
+    expect(
+      parseIsupport("EXAMPLE=foo=bar CLIENTTAGDENY=*,-draft/react"),
+    ).toEqual({
+      EXAMPLE: "foo=bar",
+      CLIENTTAGDENY: "*,-draft/react",
+    });
+  });
+
+  test("unescapes spaces encoded as \\x20 in token values", () => {
+    expect(parseIsupport("NETWORK=Test\\x20Network CASEMAPPING")).toEqual({
+      NETWORK: "Test Network",
+      CASEMAPPING: "",
+    });
+  });
+});
diff --git a/tests/lib/messageProtocol.test.ts b/tests/lib/messageProtocol.test.ts
new file mode 100644
index 00000000..69305bb8
--- /dev/null
+++ b/tests/lib/messageProtocol.test.ts
@@ -0,0 +1,35 @@
+import { describe, expect, test } from "vitest";
+import { createBatchId, splitLongMessage } from "../../src/lib/messageProtocol";
+
+const utf8Encoder = new TextEncoder();
+
+describe("messageProtocol", () => {
+  test("creates batch IDs with only ASCII letters, numbers, and hyphen", () => {
+    const batchId = createBatchId();
+
+    expect(batchId).not.toContain("_");
+    expect(batchId).toMatch(/^[A-Za-z0-9-]+$/);
+  });
+
+  test("preserves trailing whitespace on the final split chunk", () => {
+    const lines = splitLongMessage("hello world again ", "x".repeat(370));
+
+    expect(lines.at(-1)).toBe("again ");
+  });
+
+  test("does not trim existing whitespace before pushing an earlier chunk", () => {
+    const lines = splitLongMessage("hello   again final", "x".repeat(371));
+
+    expect(lines[0]).toBe("hello  ");
+  });
+
+  test("splits multiline payloads using UTF-8 byte length without breaking emoji code points", () => {
+    const lines = splitLongMessage("🙂🙂🙂🙂", "x".repeat(370));
+
+    expect(lines).toEqual(["🙂🙂🙂", "🙂"]);
+    expect(lines.join("")).toBe("🙂🙂🙂🙂");
+    expect(lines.every((line) => utf8Encoder.encode(line).length <= 13)).toBe(
+      true,
+    );
+  });
+});