From 0df146a5957d10df4f4b35b2ce375151ca0d0da1 Mon Sep 17 00:00:00 2001 From: louzt <179385168+louzt@users.noreply.github.com> Date: Wed, 22 Apr 2026 16:03:31 -0600 Subject: [PATCH] fix: harden IRCv3 tag parsing and multiline protocol helpers --- src/lib/ircUtils.tsx | 62 ++++++++++++++++++++++++++----- src/lib/messageProtocol.ts | 46 +++++++++++++++++++---- tests/lib/ircUtils.test.ts | 48 ++++++++++++++++++++++++ tests/lib/messageProtocol.test.ts | 35 +++++++++++++++++ 4 files changed, 175 insertions(+), 16 deletions(-) create mode 100644 tests/lib/ircUtils.test.ts create mode 100644 tests/lib/messageProtocol.test.ts diff --git a/src/lib/ircUtils.tsx b/src/lib/ircUtils.tsx index eae3eca8..6577ba45 100644 --- a/src/lib/ircUtils.tsx +++ b/src/lib/ircUtils.tsx @@ -55,13 +55,57 @@ export function parseMessageTags(tags: string): Record { const tagPairs = tags.substring(1).split(";"); for (const tag of tagPairs) { - const [key, value] = tag.split("="); - parsedTags[key] = value?.trim() ?? ""; // empty string fallback + const separatorIndex = tag.indexOf("="); + const key = separatorIndex === -1 ? tag : tag.slice(0, separatorIndex); + const rawValue = separatorIndex === -1 ? "" : tag.slice(separatorIndex + 1); + + parsedTags[key] = unescapeIrcMessageTagValue(rawValue); } return parsedTags; } +function unescapeIrcMessageTagValue(value: string): string { + let unescaped = ""; + + for (let index = 0; index < value.length; index += 1) { + const current = value[index]; + + if (current !== "\\") { + unescaped += current; + continue; + } + + const next = value[index + 1]; + if (next === undefined) break; + + index += 1; + + switch (next) { + case ":": + unescaped += ";"; + break; + case "s": + unescaped += " "; + break; + case "\\": + unescaped += "\\"; + break; + case "r": + unescaped += "\r"; + break; + case "n": + unescaped += "\n"; + break; + default: + unescaped += next; + break; + } + } + + return unescaped; +} + /** * Check if a user is verified based on the account tag matching their nickname. * According to IRCv3 account-tag spec, if the account tag matches the sender's nick @@ -84,13 +128,13 @@ export function parseIsupport(tokens: string): Record { const tokenPairs = tokens.split(" "); for (const token of tokenPairs) { - const [key, value] = token.split("="); - if (value) { - // Replace \x20 with actual space character - tokenMap[key] = value.replace(/\\x20/g, " "); - } else { - tokenMap[key] = ""; // empty string fallback - } + const separatorIndex = token.indexOf("="); + const key = separatorIndex === -1 ? token : token.slice(0, separatorIndex); + const rawValue = + separatorIndex === -1 ? "" : token.slice(separatorIndex + 1); + + // Replace \x20 with actual space character + tokenMap[key] = rawValue.replace(/\\x20/g, " "); } return tokenMap; diff --git a/src/lib/messageProtocol.ts b/src/lib/messageProtocol.ts index e2837dcd..c552a2ae 100644 --- a/src/lib/messageProtocol.ts +++ b/src/lib/messageProtocol.ts @@ -2,6 +2,37 @@ * IRC protocol utilities for message handling */ +const utf8Encoder = new TextEncoder(); + +function getUtf8ByteLength(value: string): number { + return utf8Encoder.encode(value).length; +} + +function splitTokenByUtf8Bytes(token: string, maxBytes: number): string[] { + const chunks: string[] = []; + let currentChunk = ""; + + for (const character of token) { + const candidateChunk = `${currentChunk}${character}`; + + if (getUtf8ByteLength(candidateChunk) > maxBytes) { + if (currentChunk) { + chunks.push(currentChunk); + } + currentChunk = character; + continue; + } + + currentChunk = candidateChunk; + } + + if (currentChunk) { + chunks.push(currentChunk); + } + + return chunks; +} + /** * Helper function to split long messages while respecting IRC protocol limits * @param message - The message to split @@ -17,7 +48,7 @@ export const splitLongMessage = ( // Available space for the actual message content const maxMessageLength = 512 - protocolOverhead; - if (message.length <= maxMessageLength) { + if (getUtf8ByteLength(message) <= maxMessageLength) { return [message]; } @@ -26,7 +57,7 @@ export const splitLongMessage = ( const words = message.split(" "); for (const word of words) { - if (word.length > maxMessageLength) { + if (getUtf8ByteLength(word) > maxMessageLength) { // If a single word is too long, we have to break it if (currentLine) { lines.push(currentLine); @@ -34,10 +65,11 @@ export const splitLongMessage = ( } // Split the long word - for (let i = 0; i < word.length; i += maxMessageLength) { - lines.push(word.slice(i, i + maxMessageLength)); - } - } else if (`${currentLine} ${word}`.length > maxMessageLength) { + lines.push(...splitTokenByUtf8Bytes(word, maxMessageLength)); + } else if ( + getUtf8ByteLength(currentLine ? `${currentLine} ${word}` : word) > + maxMessageLength + ) { // Adding this word would exceed the limit if (currentLine) { lines.push(currentLine); @@ -87,5 +119,5 @@ export const calculateProtocolOverhead = (target: string): number => { * @returns A unique batch identifier */ export const createBatchId = (): string => { - return `ml-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; + return `ml-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`; }; diff --git a/tests/lib/ircUtils.test.ts b/tests/lib/ircUtils.test.ts new file mode 100644 index 00000000..6a908763 --- /dev/null +++ b/tests/lib/ircUtils.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, test } from "vitest"; +import { parseIsupport, parseMessageTags } from "../../src/lib/ircUtils"; + +describe("parseMessageTags", () => { + test("unescapes IRCv3 message tag values without trimming meaningful data", () => { + expect(parseMessageTags("@+example=raw+:=,escaped\\:\\s\\\\")).toEqual({ + "+example": "raw+:=,escaped; \\", + }); + }); + + test("preserves everything after the first equals sign in a tag value", () => { + expect(parseMessageTags("@foo=a=b=c")).toEqual({ + foo: "a=b=c", + }); + }); + + test("treats empty and missing values as empty strings", () => { + expect(parseMessageTags("@foo;bar=")).toEqual({ + foo: "", + bar: "", + }); + }); + + test("drops invalid escape backslashes and trailing lone backslashes per spec", () => { + expect(parseMessageTags("@foo=\\b;bar=test\\")).toEqual({ + foo: "b", + bar: "test", + }); + }); +}); + +describe("parseIsupport", () => { + test("preserves everything after the first equals sign in token values", () => { + expect( + parseIsupport("EXAMPLE=foo=bar CLIENTTAGDENY=*,-draft/react"), + ).toEqual({ + EXAMPLE: "foo=bar", + CLIENTTAGDENY: "*,-draft/react", + }); + }); + + test("unescapes spaces encoded as \\x20 in token values", () => { + expect(parseIsupport("NETWORK=Test\\x20Network CASEMAPPING")).toEqual({ + NETWORK: "Test Network", + CASEMAPPING: "", + }); + }); +}); diff --git a/tests/lib/messageProtocol.test.ts b/tests/lib/messageProtocol.test.ts new file mode 100644 index 00000000..69305bb8 --- /dev/null +++ b/tests/lib/messageProtocol.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, test } from "vitest"; +import { createBatchId, splitLongMessage } from "../../src/lib/messageProtocol"; + +const utf8Encoder = new TextEncoder(); + +describe("messageProtocol", () => { + test("creates batch IDs with only ASCII letters, numbers, and hyphen", () => { + const batchId = createBatchId(); + + expect(batchId).not.toContain("_"); + expect(batchId).toMatch(/^[A-Za-z0-9-]+$/); + }); + + test("preserves trailing whitespace on the final split chunk", () => { + const lines = splitLongMessage("hello world again ", "x".repeat(370)); + + expect(lines.at(-1)).toBe("again "); + }); + + test("does not trim existing whitespace before pushing an earlier chunk", () => { + const lines = splitLongMessage("hello again final", "x".repeat(371)); + + expect(lines[0]).toBe("hello "); + }); + + test("splits multiline payloads using UTF-8 byte length without breaking emoji code points", () => { + const lines = splitLongMessage("🙂🙂🙂🙂", "x".repeat(370)); + + expect(lines).toEqual(["🙂🙂🙂", "🙂"]); + expect(lines.join("")).toBe("🙂🙂🙂🙂"); + expect(lines.every((line) => utf8Encoder.encode(line).length <= 13)).toBe( + true, + ); + }); +});