Skip to content

Commit baa745f

Browse files
committed
url: optimize path resolution with single-pass algorithm
1 parent cab20f2 commit baa745f

1 file changed

Lines changed: 187 additions & 73 deletions

File tree

lib/url.js

Lines changed: 187 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323

2424
const {
2525
ArrayPrototypeJoin,
26+
ArrayPrototypePush,
2627
Boolean,
2728
Int8Array,
2829
ObjectAssign,
2930
ObjectKeys,
3031
StringPrototypeAt,
3132
StringPrototypeCharCodeAt,
3233
StringPrototypeIndexOf,
34+
StringPrototypeLastIndexOf,
3335
StringPrototypeReplaceAll,
3436
StringPrototypeSlice,
3537
decodeURIComponent,
@@ -52,7 +54,6 @@ const {
5254

5355
// This ensures setURLConstructor() is called before the native
5456
// URL::ToObject() method is used.
55-
const { spliceOne } = require('internal/util');
5657
const { isInsideNodeModules } = internalBinding('util');
5758

5859
// WHATWG URL implementation provided by internal/url
@@ -91,7 +92,70 @@ function Url() {
9192

9293
// Reference: RFC 3986, RFC 1808, RFC 2396
9394

94-
// define these here so at least they only have to be
95+
/**
96+
* Normalize URL path segments by resolving . and .. in a single pass.
97+
* This is O(n) compared to the O(n²) spliceOne approach.
98+
* @param {string} path - Combined pathname to normalize
99+
* @param {boolean} allowAboveRoot - Whether .. can go above root
100+
* @returns {{ segments: string[], up: number, trailingSlash: boolean }}
101+
*/
102+
function normalizePathSegments(path, allowAboveRoot) {
103+
if (!path) return { segments: [], up: 0, trailingSlash: false };
104+
105+
const len = path.length;
106+
const segments = [];
107+
let up = 0;
108+
let lastSlash = -1;
109+
let segStart = 0;
110+
let lastSeg = '';
111+
let trailingSlash = false;
112+
113+
// Iterate through the path, processing segments
114+
for (let i = 0; i <= len; i++) {
115+
const code = i < len ? StringPrototypeCharCodeAt(path, i) : CHAR_FORWARD_SLASH;
116+
117+
if (code === CHAR_FORWARD_SLASH) {
118+
if (lastSlash === i - 1 || segStart === i) {
119+
// Empty segment (// or leading /)
120+
// Track that the last segment was empty for trailing slash detection
121+
// Preserve empty segment for double slashes (but not for the initial leading /)
122+
if (lastSlash >= 0) {
123+
ArrayPrototypePush(segments, '');
124+
}
125+
lastSeg = '';
126+
} else {
127+
const segment = StringPrototypeSlice(path, segStart, i);
128+
lastSeg = segment;
129+
130+
if (segment === '.') {
131+
// Current directory - skip
132+
} else if (segment === '..') {
133+
// Parent directory
134+
if (segments.length > 0 && segments[segments.length - 1] !== '..') {
135+
segments.pop();
136+
} else if (allowAboveRoot) {
137+
ArrayPrototypePush(segments, '..');
138+
} else {
139+
up++;
140+
}
141+
} else {
142+
// Regular segment
143+
ArrayPrototypePush(segments, segment);
144+
}
145+
}
146+
lastSlash = i;
147+
segStart = i + 1;
148+
}
149+
}
150+
151+
// Determine trailing slash based on what the last segment was before normalization
152+
// If path ends with /, ., or .., we need a trailing slash
153+
trailingSlash = lastSeg === '' || lastSeg === '.' || lastSeg === '..';
154+
155+
return { segments, up, trailingSlash };
156+
}
157+
158+
// Define these here so at least they only have to be
95159
// compiled once on the first module load.
96160
const protocolPattern = /^[a-z0-9.+-]+:/i;
97161
const portPattern = /:[0-9]*$/;
@@ -127,6 +191,7 @@ const {
127191
CHAR_VERTICAL_LINE,
128192
CHAR_AT,
129193
CHAR_COLON,
194+
CHAR_DOT,
130195
} = require('internal/constants');
131196

132197
let urlParseWarned = false;
@@ -824,11 +889,14 @@ Url.prototype.resolveObject = function resolveObject(relative) {
824889
let mustEndAbs = (isRelAbs || isSourceAbs ||
825890
(result.host && relative.pathname));
826891
const removeAllDots = mustEndAbs;
827-
let srcPath = (result.pathname && result.pathname.split('/')) || [];
828-
const relPath = (relative.pathname && relative.pathname.split('/')) || [];
829892
const noLeadingSlashes = result.protocol &&
830893
!slashedProtocol.has(result.protocol);
831894

895+
// Build the combined path string for normalization
896+
let combinedPath = '';
897+
let srcHost = ''; // For noLeadingSlashes protocols
898+
let relHost = ''; // For noLeadingSlashes protocols
899+
832900
// If the url is a non-slashed url, then relative
833901
// links like ../.. should be able
834902
// to crawl up to the hostname, as well. This is strange.
@@ -837,22 +905,15 @@ Url.prototype.resolveObject = function resolveObject(relative) {
837905
if (noLeadingSlashes) {
838906
result.hostname = '';
839907
result.port = null;
840-
if (result.host) {
841-
if (srcPath[0] === '') srcPath[0] = result.host;
842-
else srcPath.unshift(result.host);
843-
}
908+
srcHost = result.host || '';
844909
result.host = '';
845910
if (relative.protocol) {
846911
relative.hostname = null;
847912
relative.port = null;
848913
result.auth = null;
849-
if (relative.host) {
850-
if (relPath[0] === '') relPath[0] = relative.host;
851-
else relPath.unshift(relative.host);
852-
}
914+
relHost = relative.host || '';
853915
relative.host = null;
854916
}
855-
mustEndAbs &&= (relPath[0] === '' || srcPath[0] === '');
856917
}
857918

858919
if (isRelAbs) {
@@ -868,30 +929,65 @@ Url.prototype.resolveObject = function resolveObject(relative) {
868929
}
869930
result.search = relative.search;
870931
result.query = relative.query;
871-
srcPath = relPath;
872-
// Fall through to the dot-handling below.
873-
} else if (relPath.length) {
932+
// Use relative path directly
933+
if (noLeadingSlashes && relHost) {
934+
combinedPath = (relative.pathname && relative.pathname.charAt(0) === '/' ?
935+
relHost + relative.pathname : relHost + '/' + (relative.pathname || ''));
936+
} else {
937+
combinedPath = relative.pathname || '';
938+
}
939+
} else if (relative.pathname) {
874940
// it's relative
875941
// throw away the existing file, and take the new path instead.
876-
srcPath ||= [];
877-
srcPath.pop();
878-
srcPath = srcPath.concat(relPath);
879942
result.search = relative.search;
880943
result.query = relative.query;
944+
945+
// Build combined path: source path (minus last segment) + relative path
946+
let srcPathname = result.pathname || '';
947+
if (noLeadingSlashes && srcHost) {
948+
srcPathname = (srcPathname && srcPathname.charAt(0) === '/' ?
949+
srcHost + srcPathname : srcHost + '/' + srcPathname);
950+
}
951+
952+
// Remove the last segment from source (the "file" part)
953+
const lastSlashIndex = StringPrototypeLastIndexOf(srcPathname, '/');
954+
if (lastSlashIndex >= 0) {
955+
srcPathname = StringPrototypeSlice(srcPathname, 0, lastSlashIndex + 1);
956+
} else {
957+
srcPathname = '';
958+
}
959+
960+
// Append relative pathname
961+
let relPathname = relative.pathname;
962+
if (noLeadingSlashes && relHost) {
963+
relPathname = (relPathname && relPathname.charAt(0) === '/' ?
964+
relHost + relPathname : relHost + '/' + relPathname);
965+
}
966+
combinedPath = srcPathname + relPathname;
881967
} else if (relative.search !== null && relative.search !== undefined) {
882968
// Just pull out the search.
883969
// like href='?foo'.
884970
// Put this after the other two cases because it simplifies the booleans
885971
if (noLeadingSlashes) {
886-
result.hostname = result.host = srcPath.shift();
972+
// Extract host from first segment of source path
973+
const srcPathname = result.pathname || '';
974+
const firstSlashIdx = StringPrototypeIndexOf(srcPathname, '/');
975+
if (firstSlashIdx > 0) {
976+
result.hostname = result.host = StringPrototypeSlice(srcPathname, 0, firstSlashIdx);
977+
} else if (firstSlashIdx === -1 && srcPathname) {
978+
result.hostname = result.host = srcPathname;
979+
} else if (srcHost) {
980+
result.hostname = result.host = srcHost;
981+
} else {
982+
result.hostname = result.host = '';
983+
}
887984
// Occasionally the auth can get stuck only in host.
888-
// This especially happens in cases like
889-
// url.resolveObject('mailto:local1@domain1', 'local2@domain2')
890985
const authInHost =
891-
result.host && result.host.indexOf('@') > 0 && result.host.split('@');
986+
result.host && StringPrototypeIndexOf(result.host, '@') > 0;
892987
if (authInHost) {
893-
result.auth = authInHost.shift();
894-
result.host = result.hostname = authInHost.shift();
988+
const atIdx = StringPrototypeIndexOf(result.host, '@');
989+
result.auth = StringPrototypeSlice(result.host, 0, atIdx);
990+
result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1);
895991
}
896992
}
897993
result.search = relative.search;
@@ -903,9 +999,24 @@ Url.prototype.resolveObject = function resolveObject(relative) {
903999
}
9041000
result.href = result.format();
9051001
return result;
1002+
} else {
1003+
// No relative path at all, use source path
1004+
if (noLeadingSlashes && srcHost) {
1005+
const srcPathname = result.pathname || '';
1006+
combinedPath = (srcPathname && srcPathname.charAt(0) === '/' ?
1007+
srcHost + srcPathname : srcHost + '/' + srcPathname);
1008+
} else {
1009+
combinedPath = result.pathname || '';
1010+
}
9061011
}
9071012

908-
if (!srcPath.length) {
1013+
// Check if we need to handle noLeadingSlashes mustEndAbs
1014+
if (noLeadingSlashes) {
1015+
const startsWithSlash = combinedPath && combinedPath.charAt(0) === '/';
1016+
mustEndAbs &&= startsWithSlash;
1017+
}
1018+
1019+
if (!combinedPath) {
9091020
// No path at all. All other things were already handled above.
9101021
result.pathname = null;
9111022
// To support http.request
@@ -918,75 +1029,78 @@ Url.prototype.resolveObject = function resolveObject(relative) {
9181029
return result;
9191030
}
9201031

921-
// If a url ENDs in . or .., then it must get a trailing slash.
922-
// however, if it ends in anything else non-slashy,
923-
// then it must NOT get a trailing slash.
924-
let last = srcPath[srcPath.length - 1];
925-
const hasTrailingSlash = (
926-
((result.host || relative.host || srcPath.length > 1) &&
927-
(last === '.' || last === '..')) || last === '');
928-
929-
// Strip single dots, resolve double dots to parent dir
930-
// if the path tries to go above the root, `up` ends up > 0
931-
let up = 0;
932-
for (let i = srcPath.length - 1; i >= 0; i--) {
933-
last = srcPath[i];
934-
if (last === '.') {
935-
spliceOne(srcPath, i);
936-
} else if (last === '..') {
937-
spliceOne(srcPath, i);
938-
up++;
939-
} else if (up) {
940-
spliceOne(srcPath, i);
941-
up--;
1032+
// Use optimized single-pass normalization (O(n) instead of O(n²))
1033+
const allowAboveRoot = !mustEndAbs && !removeAllDots;
1034+
const { segments, up, trailingSlash } = normalizePathSegments(combinedPath, allowAboveRoot);
1035+
1036+
const pathHadMultipleSegments = combinedPath && StringPrototypeIndexOf(combinedPath, '/') !== -1;
1037+
const hasTrailingSlash = trailingSlash &&
1038+
(result.host || relative.host || pathHadMultipleSegments);
1039+
1040+
// Handle remaining 'up' count - add leading .. if allowed
1041+
let srcPath = segments;
1042+
if (up > 0 && allowAboveRoot) {
1043+
// Prepend '..' segments for remaining up count
1044+
const newPath = [];
1045+
for (let i = 0; i < up; i++) {
1046+
ArrayPrototypePush(newPath, '..');
9421047
}
943-
}
944-
945-
// If the path is allowed to go above the root, restore leading ..s
946-
if (!mustEndAbs && !removeAllDots) {
947-
while (up--) {
948-
srcPath.unshift('..');
1048+
for (let i = 0; i < srcPath.length; i++) {
1049+
ArrayPrototypePush(newPath, srcPath[i]);
9491050
}
1051+
srcPath = newPath;
9501052
}
9511053

952-
if (mustEndAbs && srcPath[0] !== '' &&
953-
(!srcPath[0] || srcPath[0].charAt(0) !== '/')) {
954-
srcPath.unshift('');
1054+
// Handle mustEndAbs - ensure path starts with /
1055+
let isAbsolute = srcPath.length > 0 && srcPath[0] === '';
1056+
if (!isAbsolute && srcPath.length > 0 && srcPath[0] &&
1057+
srcPath[0].charAt(0) === '/') {
1058+
isAbsolute = true;
9551059
}
9561060

957-
if (hasTrailingSlash && StringPrototypeAt(ArrayPrototypeJoin(srcPath, '/'), -1) !== '/') {
958-
srcPath.push('');
959-
}
960-
961-
const isAbsolute = srcPath[0] === '' ||
962-
(srcPath[0] && srcPath[0].charAt(0) === '/');
963-
964-
// put the host back
1061+
// Put the host back for noLeadingSlashes protocols
9651062
if (noLeadingSlashes) {
9661063
result.hostname =
967-
result.host = isAbsolute ? '' : srcPath.length ? srcPath.shift() : '';
1064+
result.host = isAbsolute ? '' : srcPath.length ? srcPath[0] : '';
1065+
if (result.host) {
1066+
// Remove the host from srcPath (first element)
1067+
srcPath = srcPath.length > 1 ?
1068+
ArrayPrototypeJoin(srcPath, '/').slice(result.host.length + 1).split('/') :
1069+
[];
1070+
if (srcPath.length === 1 && srcPath[0] === '') srcPath = [];
1071+
}
9681072
// Occasionally the auth can get stuck only in host.
969-
// This especially happens in cases like
970-
// url.resolveObject('mailto:local1@domain1', 'local2@domain2')
971-
const authInHost = result.host && result.host.indexOf('@') > 0 ?
972-
result.host.split('@') : false;
1073+
const authInHost = result.host && StringPrototypeIndexOf(result.host, '@') > 0;
9731074
if (authInHost) {
974-
result.auth = authInHost.shift();
975-
result.host = result.hostname = authInHost.shift();
1075+
const atIdx = StringPrototypeIndexOf(result.host, '@');
1076+
result.auth = StringPrototypeSlice(result.host, 0, atIdx);
1077+
result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1);
9761078
}
9771079
}
9781080

9791081
mustEndAbs ||= (result.host && srcPath.length);
9801082

9811083
if (mustEndAbs && !isAbsolute) {
982-
srcPath.unshift('');
1084+
// Need to add leading empty string for absolute path
1085+
const newPath = [''];
1086+
for (let i = 0; i < srcPath.length; i++) {
1087+
ArrayPrototypePush(newPath, srcPath[i]);
1088+
}
1089+
srcPath = newPath;
1090+
isAbsolute = true;
1091+
}
1092+
1093+
if (hasTrailingSlash &&
1094+
(srcPath.length === 0 ||
1095+
StringPrototypeAt(ArrayPrototypeJoin(srcPath, '/'), -1) !== '/')) {
1096+
ArrayPrototypePush(srcPath, '');
9831097
}
9841098

9851099
if (!srcPath.length) {
9861100
result.pathname = null;
9871101
result.path = null;
9881102
} else {
989-
result.pathname = srcPath.join('/');
1103+
result.pathname = ArrayPrototypeJoin(srcPath, '/');
9901104
}
9911105

9921106
// To support request.http

0 commit comments

Comments
 (0)