Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 84 additions & 137 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const {
ERR_INVALID_THIS,
ERR_NO_ICU,
} = require('internal/errors').codes;
const kMethod = Symbol('method');
const kSingleByte = Symbol('single-byte');
const kHandle = Symbol('handle');
const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
Expand Down Expand Up @@ -52,6 +52,8 @@ const {
validateObject,
kValidateObjectAllowObjectsAndNull,
} = require('internal/validators');

const { hasIntl } = internalBinding('config');
const binding = internalBinding('encoding_binding');
const {
encodeInto,
Expand Down Expand Up @@ -405,166 +407,111 @@ function parseInput(input) {
}
}

const TextDecoder =
internalBinding('config').hasIntl ?
makeTextDecoderICU() :
makeTextDecoderJS();

function makeTextDecoderICU() {
const {
decode: _decode,
getConverter,
} = internalBinding('icu');

class TextDecoder {
constructor(encoding = 'utf-8', options = kEmptyObject) {
encoding = `${encoding}`;
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

const enc = getEncodingFromLabel(encoding);
if (enc === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);

let flags = 0;
if (options !== null) {
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

this[kDecoder] = true;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kHandle] = undefined;
this[kMethod] = undefined;

if (isSinglebyteEncoding(this.encoding)) {
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
} else if (!this[kUTF8FastPath]) {
this.#prepareConverter();
}
}

#prepareConverter() {
if (this[kHandle] !== undefined) return;
let icuEncoding = this[kEncoding];
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
const handle = getConverter(icuEncoding, this[kFlags]);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
this[kHandle] = handle;
}
let icuDecode, icuGetConverter;
if (hasIntl) {
;({
decode: icuDecode,
getConverter: icuGetConverter,
} = internalBinding('icu'));
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
const kBOMSeen = Symbol('BOM seen');

if (this[kMethod]) return this[kMethod](parseInput(input));
let StringDecoder;
function lazyStringDecoder() {
if (StringDecoder === undefined)
({ StringDecoder } = require('string_decoder'));
return StringDecoder;
}

this[kUTF8FastPath] &&= !(options?.stream);
class TextDecoder {
constructor(encoding = 'utf-8', options = kEmptyObject) {
encoding = `${encoding}`;
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}
const enc = getEncodingFromLabel(encoding);
if (enc === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);

this.#prepareConverter();

let flags = 0;
if (options !== null)
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
let flags = 0;
if (options !== null) {
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

return _decode(this[kHandle], input, flags, this.encoding);
this[kDecoder] = true;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
this[kUTF8FastPath] = false;
this[kHandle] = undefined;
this[kSingleByte] = undefined; // Does not care about streaming or BOM

if (enc === 'utf-8') {
this[kUTF8FastPath] = true;
} else if (isSinglebyteEncoding(enc)) {
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
} else {
this.#prepareConverter(); // Need to throw early if we don't support the encoding
}
}

return TextDecoder;
}

function makeTextDecoderJS() {
let StringDecoder;
function lazyStringDecoder() {
if (StringDecoder === undefined)
({ StringDecoder } = require('string_decoder'));
return StringDecoder;
#prepareConverter() {
if (this[kHandle] !== undefined) return;
if (hasIntl) {
let icuEncoding = this[kEncoding];
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
const handle = icuGetConverter(icuEncoding, this[kFlags]);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
this[kHandle] = handle;
} else if (this[kEncoding] === 'utf-8' || this[kEncoding] === 'utf-16le') {
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
this[kHandle] = new (lazyStringDecoder())(this[kEncoding]);
this[kBOMSeen] = false;
} else {
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
}
}

const kBOMSeen = Symbol('BOM seen');

function hasConverter(encoding) {
return encoding === 'utf-8' || encoding === 'utf-16le';
}
decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

class TextDecoder {
constructor(encoding = 'utf-8', options = kEmptyObject) {
encoding = `${encoding}`;
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
if (this[kSingleByte]) return this[kSingleByte](parseInput(input));

const enc = getEncodingFromLabel(encoding);
if (enc === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
const stream = options?.stream;
if (this[kUTF8FastPath]) {
if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
this[kUTF8FastPath] = false;
}

let flags = 0;
if (options !== null) {
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}
this.#prepareConverter();

this[kDecoder] = true;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
this[kBOMSeen] = false;
this[kMethod] = undefined;

if (isSinglebyteEncoding(enc)) {
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
} else {
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
this[kHandle] = new (lazyStringDecoder())(enc);
}
if (hasIntl) {
const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
return icuDecode(this[kHandle], input, flags, this[kEncoding]);
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
input = parseInput(input);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
input = parseInput(input);

if (this[kMethod]) return this[kMethod](input);
let result = stream ? this[kHandle].write(input) : this[kHandle].end(input);

if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
this[kBOMSeen] = false;
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
// If the very first result in the stream is a BOM, and we are not
// explicitly told to ignore it, then we discard it.
if (result[0] === '\ufeff') {
result = StringPrototypeSlice(result, 1);
}
this[kBOMSeen] = true;
}

if (options !== null && options.stream) {
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
} else {
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
}
if (!stream) this[kBOMSeen] = false;

let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
this[kHandle].end(input) :
this[kHandle].write(input);

if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
// If the very first result in the stream is a BOM, and we are not
// explicitly told to ignore it, then we discard it.
if (result[0] === '\ufeff') {
result = StringPrototypeSlice(result, 1);
}
this[kBOMSeen] = true;
}
return result;

return result;
}
}

return TextDecoder;
}

// Mix in some shared properties.
Expand Down
Loading