Skip to content

Commit 4768a67

Browse files
fix(security): remove remaining regex redos paths (#111)
1 parent c695283 commit 4768a67

3 files changed

Lines changed: 279 additions & 65 deletions

File tree

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import { describe, expect, it } from "bun:test";
2+
import {
3+
collectNormalizedConnectors,
4+
extractConnectorPairFromText,
5+
} from "./connector-text";
6+
7+
describe("extractConnectorPairFromText", () => {
8+
it("parses USB-C to USB-C titles without regex backtracking", () => {
9+
expect(extractConnectorPairFromText("USB-C to USB-C Cable")).toEqual({
10+
from: "USB-C",
11+
matchedText: "usb c to usb c",
12+
to: "USB-C",
13+
});
14+
});
15+
16+
it("normalizes Thunderbolt connectors to USB-C physical endpoints", () => {
17+
expect(
18+
extractConnectorPairFromText("Thunderbolt 5 to USB-C Pro Cable")
19+
).toEqual({
20+
from: "USB-C",
21+
matchedText: "thunderbolt 5 to usb c",
22+
to: "USB-C",
23+
});
24+
});
25+
});
26+
27+
describe("collectNormalizedConnectors", () => {
28+
it("deduplicates normalized connectors from free text", () => {
29+
expect(
30+
collectNormalizedConnectors(
31+
"Works with USB-C, Thunderbolt 4, and Lightning accessories."
32+
)
33+
).toEqual(["USB-C", "Lightning"]);
34+
});
35+
});
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
interface ConnectorPairMatch {
2+
from: string;
3+
matchedText: string;
4+
to: string;
5+
}
6+
7+
interface ConnectorTokenMatch {
8+
connector: string;
9+
matchedText: string;
10+
nextIndex: number;
11+
}
12+
13+
const createConnectorTokenMatch = (
14+
connector: string,
15+
matchedText: string,
16+
nextIndex: number
17+
): ConnectorTokenMatch => {
18+
return {
19+
connector,
20+
matchedText,
21+
nextIndex,
22+
};
23+
};
24+
25+
const tokenizeConnectorWords = (text: string): string[] => {
26+
const words: string[] = [];
27+
let current = "";
28+
29+
for (const character of text.toLowerCase()) {
30+
const isDigit = character >= "0" && character <= "9";
31+
const isLowercaseLetter = character >= "a" && character <= "z";
32+
33+
if (isDigit || isLowercaseLetter || character === ".") {
34+
current += character;
35+
continue;
36+
}
37+
38+
if (current.length > 0) {
39+
words.push(current);
40+
current = "";
41+
}
42+
}
43+
44+
if (current.length > 0) {
45+
words.push(current);
46+
}
47+
48+
return words;
49+
};
50+
51+
const isDigitsOnly = (value: string): boolean => {
52+
if (value.length === 0) {
53+
return false;
54+
}
55+
56+
for (const character of value) {
57+
if (character < "0" || character > "9") {
58+
return false;
59+
}
60+
}
61+
62+
return true;
63+
};
64+
65+
const readLightningToken = (
66+
words: string[],
67+
startIndex: number
68+
): ConnectorTokenMatch | null => {
69+
if (words[startIndex] !== "lightning") {
70+
return null;
71+
}
72+
73+
return createConnectorTokenMatch("Lightning", "lightning", startIndex + 1);
74+
};
75+
76+
const readMicroUsbToken = (
77+
words: string[],
78+
startIndex: number
79+
): ConnectorTokenMatch | null => {
80+
const current = words[startIndex];
81+
if (current === "microusb") {
82+
return createConnectorTokenMatch("Micro-USB", "micro usb", startIndex + 1);
83+
}
84+
85+
if (current === "micro" && words[startIndex + 1] === "usb") {
86+
return createConnectorTokenMatch("Micro-USB", "micro usb", startIndex + 2);
87+
}
88+
89+
return null;
90+
};
91+
92+
const readCollapsedUsbToken = (
93+
words: string[],
94+
startIndex: number
95+
): ConnectorTokenMatch | null => {
96+
const current = words[startIndex];
97+
switch (current) {
98+
case "usbc":
99+
return createConnectorTokenMatch("USB-C", "usb c", startIndex + 1);
100+
case "usba":
101+
return createConnectorTokenMatch("USB-A", "usb a", startIndex + 1);
102+
case "usb3":
103+
case "usb3.0":
104+
return createConnectorTokenMatch("USB-A", "usb 3.0", startIndex + 1);
105+
default:
106+
return null;
107+
}
108+
};
109+
110+
const readSplitUsbToken = (
111+
words: string[],
112+
startIndex: number
113+
): ConnectorTokenMatch | null => {
114+
if (words[startIndex] !== "usb") {
115+
return null;
116+
}
117+
118+
const next = words[startIndex + 1];
119+
switch (next) {
120+
case "c":
121+
return createConnectorTokenMatch("USB-C", "usb c", startIndex + 2);
122+
case "a":
123+
return createConnectorTokenMatch("USB-A", "usb a", startIndex + 2);
124+
case "3":
125+
case "3.0":
126+
return createConnectorTokenMatch("USB-A", "usb 3.0", startIndex + 2);
127+
default:
128+
return null;
129+
}
130+
};
131+
132+
const readThunderboltToken = (
133+
words: string[],
134+
startIndex: number
135+
): ConnectorTokenMatch | null => {
136+
const current = words[startIndex];
137+
if (!current?.startsWith("thunderbolt")) {
138+
return null;
139+
}
140+
141+
const suffix = current.slice("thunderbolt".length);
142+
if (isDigitsOnly(suffix)) {
143+
return createConnectorTokenMatch(
144+
"USB-C",
145+
`thunderbolt ${suffix}`,
146+
startIndex + 1
147+
);
148+
}
149+
150+
const next = words[startIndex + 1];
151+
if (next && isDigitsOnly(next)) {
152+
return createConnectorTokenMatch(
153+
"USB-C",
154+
`thunderbolt ${next}`,
155+
startIndex + 2
156+
);
157+
}
158+
159+
return createConnectorTokenMatch("USB-C", "thunderbolt", startIndex + 1);
160+
};
161+
162+
const readConnectorToken = (
163+
words: string[],
164+
startIndex: number
165+
): ConnectorTokenMatch | null => {
166+
return (
167+
readLightningToken(words, startIndex) ??
168+
readMicroUsbToken(words, startIndex) ??
169+
readCollapsedUsbToken(words, startIndex) ??
170+
readSplitUsbToken(words, startIndex) ??
171+
readThunderboltToken(words, startIndex)
172+
);
173+
};
174+
175+
export const collectNormalizedConnectors = (text: string): string[] => {
176+
const connectors = new Set<string>();
177+
const words = tokenizeConnectorWords(text);
178+
179+
for (let index = 0; index < words.length; index += 1) {
180+
const token = readConnectorToken(words, index);
181+
if (!token) {
182+
continue;
183+
}
184+
185+
connectors.add(token.connector);
186+
index = token.nextIndex - 1;
187+
}
188+
189+
return [...connectors];
190+
};
191+
192+
export const extractConnectorPairFromText = (
193+
text: string
194+
): ConnectorPairMatch | null => {
195+
const words = tokenizeConnectorWords(text);
196+
197+
for (let index = 0; index < words.length; index += 1) {
198+
const from = readConnectorToken(words, index);
199+
if (!from || words[from.nextIndex] !== "to") {
200+
continue;
201+
}
202+
203+
const to = readConnectorToken(words, from.nextIndex + 1);
204+
if (!to) {
205+
continue;
206+
}
207+
208+
return {
209+
from: from.connector,
210+
matchedText: `${from.matchedText} to ${to.matchedText}`,
211+
to: to.connector,
212+
};
213+
}
214+
215+
return null;
216+
};

packages/shopify-cable-source/src/source.ts

Lines changed: 28 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import {
2+
collectNormalizedConnectors,
3+
extractConnectorPairFromText,
4+
} from "./connector-text";
15
import { cleanText, normalizeWhitespace } from "./text";
26
import type {
37
ShopifyCableSourceTemplate,
@@ -156,10 +160,6 @@ class HttpError extends Error {
156160

157161
const NEXT_DATA_SCRIPT_REGEX =
158162
/<script id="__NEXT_DATA__" type="application\/json">([\s\S]*?)<\/script>/;
159-
const CONNECTOR_TOKEN_REGEX =
160-
/(Thunderbolt\s*\d*|USB[-\s]?C|USB[-\s]?A|USB\s*3\.0|Lightning|Micro[-\s]?USB)/i;
161-
const CONNECTOR_PAIR_REGEX =
162-
/(Thunderbolt\s*\d*|USB[-\s]?C|USB[-\s]?A|USB\s*3\.0|Lightning|Micro[-\s]?USB)\s*to\s*(Thunderbolt\s*\d*|USB[-\s]?C|USB[-\s]?A|USB\s*3\.0|Lightning|Micro[-\s]?USB)/i;
163163
const THUNDERBOLT_WORD_REGEX = /thunderbolt/i;
164164
const CABLE_WORD_REGEX = /cable/i;
165165
const POWER_REGEX = /(\d{1,3}(?:\.\d+)?)\s*W\b/gi;
@@ -213,10 +213,30 @@ const combineUniqueText = (...segments: Array<string | undefined>): string => {
213213
};
214214

215215
const slugify = (value: string): string => {
216-
return value
217-
.toLowerCase()
218-
.replaceAll(/[^a-z0-9]+/g, "-")
219-
.replaceAll(/^-+|-+$/g, "");
216+
let output = "";
217+
let previousWasSeparator = false;
218+
219+
for (const character of value.toLowerCase()) {
220+
const isDigit = character >= "0" && character <= "9";
221+
const isLowercaseLetter = character >= "a" && character <= "z";
222+
223+
if (isDigit || isLowercaseLetter) {
224+
output += character;
225+
previousWasSeparator = false;
226+
continue;
227+
}
228+
229+
if (!previousWasSeparator && output.length > 0) {
230+
output += "-";
231+
previousWasSeparator = true;
232+
}
233+
}
234+
235+
if (output.endsWith("-")) {
236+
return output.slice(0, -1);
237+
}
238+
239+
return output;
220240
};
221241

222242
const normalizeBrand = (vendor: string, fallbackBrand: string): string => {
@@ -501,63 +521,6 @@ const mapProductJsonToShopifyProduct = (
501521
};
502522
};
503523

504-
const normalizeConnectorToken = (token: string): string | null => {
505-
const normalized = token.toLowerCase().replace(/\s+/g, "");
506-
if (normalized.includes("thunderbolt")) {
507-
return "USB-C";
508-
}
509-
if (normalized.includes("lightning")) {
510-
return "Lightning";
511-
}
512-
if (normalized.includes("micro")) {
513-
return "Micro-USB";
514-
}
515-
if (normalized.includes("usb3.0") || normalized.includes("usb-a")) {
516-
return "USB-A";
517-
}
518-
if (normalized.includes("usb-c") || normalized.includes("usbc")) {
519-
return "USB-C";
520-
}
521-
return null;
522-
};
523-
524-
const extractConnectorPairFromText = (
525-
text: string
526-
): { from: string; matchedText: string; to: string } | null => {
527-
const pairMatch = text.match(CONNECTOR_PAIR_REGEX);
528-
if (!(pairMatch?.[1] && pairMatch[2])) {
529-
return null;
530-
}
531-
532-
const from = normalizeConnectorToken(pairMatch[1]);
533-
const to = normalizeConnectorToken(pairMatch[2]);
534-
if (!(from && to)) {
535-
return null;
536-
}
537-
538-
return {
539-
from,
540-
to,
541-
matchedText: pairMatch[0],
542-
};
543-
};
544-
545-
const collectNormalizedConnectors = (text: string): string[] => {
546-
const connectorMatcher = new RegExp(CONNECTOR_TOKEN_REGEX.source, "gi");
547-
const connectors = new Set<string>();
548-
let connectorMatch = connectorMatcher.exec(text);
549-
while (connectorMatch) {
550-
const normalized = normalizeConnectorToken(
551-
connectorMatch[1] ?? connectorMatch[0]
552-
);
553-
if (normalized) {
554-
connectors.add(normalized);
555-
}
556-
connectorMatch = connectorMatcher.exec(text);
557-
}
558-
return [...connectors];
559-
};
560-
561524
const parseConnectorPair = (
562525
title: string,
563526
contextText: string

0 commit comments

Comments
 (0)