diff --git a/README.md b/README.md index 2d5a00f..084dfb1 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,8 @@ microcbor is a minimal JavaScript [CBOR](https://cbor.io/) implementation featur - small footprint - fast performance - `Iterable` and `AsyncIterable` streaming APIs with "chunk recycling" encoding option -- [Web Streams API](https://developer.mozilla.org/en-US/docs/Web/API/Streams_API)-compatible [TransformStream](https://developer.mozilla.org/en-US/docs/Web/API/TransformStream) classes +- [Web Streams API](https://developer.mozilla.org/en-US/docs/Web/API/Streams_API)-compatible [TransformStream](https://developer.mozilla.org/en-US/docs/Web/API/TransformStream) classes with proper backpressure +- key mapping and value validation with `onKey()`, `onValue()` callbacks. microcbor follows the [deterministic CBOR encoding requirements](https://www.rfc-editor.org/rfc/rfc8949.html#core-det) - all floating-point numbers are serialized in the smallest possible size without losing precision, and object entries are always sorted by key in byte-wise utf-8 lexicographic order. `NaN` is always serialized as `0xf97e00`. **microcbor doesn't support tags, bigints, typed arrays, non-string keys, or indefinite-length collections.** @@ -29,8 +30,10 @@ This library is TypeScript-native, ESM-only, and has just **one dependency** [jo - [`encodeAsyncIterable`](#encodeasynciterable) - [`CBOREncoderStream`](#cborencoderstream) - [Decoding](#decoding) + - [`DecodeOptions`](#decodeoptions) - [`decode`](#decode) - [`decodeIterable`](#decodeiterable) + - [`AsyncDecodeOptions`](#asyncdecodeoptions) - [`decodeAsyncIterable`](#decodeasynciterable) - [`CBORDecoderStream`](#cbordecoderstream) - [Value mapping](#value-mapping) @@ -41,9 +44,13 @@ This library is TypeScript-native, ESM-only, and has just **one dependency** [jo ## Install -``` +```bash npm i microcbor ``` +or +```bash +bun add microcbor +``` ## Usage @@ -89,7 +96,7 @@ interface CBORMap { #### `EncodeOptions` ```ts -export interface EncodeOptions { +export interface EncodeOptions { /** * Allow `undefined` * @default true @@ -107,16 +114,33 @@ export interface EncodeOptions { chunkRecycling?: boolean /** - * Maximum chunk size. + * Maximum chunk size * @default 4096 */ chunkSize?: number /** - * Minimum bitsize for floating-point numbers: 16, 32, or 64. + * Minimum bitsize for floating-point numbers: 16, 32, or 64 * @default 16 */ minFloatSize?: (typeof FloatSize)[keyof typeof FloatSize] + + /** + * Function to remap/validate object keys while encoding + * @param key Original object key + * @throws Error if key is invalid + * @returns An optional replacement key string + */ + onKey?: (key: string) => string|void + + /** + * Function to validate/transform/replace values while encoding + * @param value Value to validate/transform/replace + * @param keyPath Array of keys describing the access path to this value + * @throws Error if value is invalid + * @returns An optional replacement value to use + */ + onValue?: (value: T, keyPath: (string|number)[]) => CBORValue|void } ``` @@ -127,7 +151,7 @@ export interface EncodeOptions { * Calculate the byte length that a value will encode into * without actually allocating anything. */ -declare function encodingLength( +export function encodingLength( value: CBORValue, options?: EncodeOptions, ): number @@ -138,31 +162,46 @@ declare function encodingLength( ```ts /** * Encode a single CBOR value. + * @param value Value to encode + * @param options Encode options * options.chunkRecycling has no effect here. */ -export function encode(value: CBORValue, options?: EncodeOptions): Uint8Array +export function encode( + value: T, + options?: EncodeOptions +): Uint8Array +export function encode( + value: T, + options: WithRequired>>, "onValue"> +): Uint8Array ``` #### `encodeIterable` ```ts /** Encode an iterable of CBOR values into an iterable of Uint8Array chunks */ -export function* encodeIterable( - source: Iterable, - options?: EncodeOptions, +export function* encodeIterable( + source: Iterable, + options?: EncodeOptions +): IterableIterator +export function* encodeIterable( + source: Iterable, + options: WithRequired>>, "onValue"> ): IterableIterator - ``` #### `encodeAsyncIterable` ```ts /** Encode an async iterable of CBOR values into an async iterable of Uint8Array chunks */ -export async function* encodeAsyncIterable( - source: AsyncIterable, - options?: EncodeOptions, +export async function* encodeAsyncIterable( + source: AsyncIterable, + options?: EncodeOptions +): AsyncIterableIterator +export async function* encodeAsyncIterable( + source: AsyncIterable, + options: WithRequired>>, "onValue"> ): AsyncIterableIterator - ``` #### `CBOREncoderStream` @@ -172,8 +211,14 @@ export async function* encodeAsyncIterable( * Encode a Web Streams API ReadableStream. * options.chunkRecycling has no effect here. */ -export class CBOREncoderStream extends TransformStream { - public constructor(options?: EncodeOptions) +export class CBOREncoderStream { + readable: ReadableStream + writable: WritableStream + + public constructor(...[options]: T extends CBORValue + ? []|[EncodeOptions] + : [WithRequired>>, "onValue">] + ) } ``` @@ -182,7 +227,7 @@ export class CBOREncoderStream extends TransformStream { #### `DecodeOptions` ```ts -export interface DecodeOptions { +export interface DecodeOptions { /** * Allow `undefined` * @default true @@ -194,47 +239,127 @@ export interface DecodeOptions { * @default 16 */ minFloatSize?: (typeof FloatSize)[keyof typeof FloatSize] + + /** + * Function to remap/validate object keys while decoding + * @param decodeKey Function to decode original object key + * @param length Key length to validate pre-decoding + * @throws Error if length/key is invalid + * @returns An optional replacement key string + */ + onKey?: (decodeKey: () => string, length: number) => string|void + + /** + * Function to validate/transform/replace values while decoding + * @param decodeValue Function to decode value + * @param length Value length/size to validate pre-decoding + * @param type Value type (e.g. 'number', 'string', 'Uint8Array'...) + * @param keyPath Array of keys describing the access path to this value + * @throws Error if length/value is invalid + * @returns An optional replacement value to use + */ + onValue?: ( + decodeValue: () => CBORValue, + length: number, + type: string, + keyPath: (string|number)[] + ) => T|void } ``` #### `decode` ```ts -/** Decode a single CBOR value. */ -export function decode( - data: Uint8Array, - options?: DecodeOptions, +/* + * Decode a single CBOR value + * @param data Data to decode + * @param options Decode options + */ +export function decode(...[data, options]: T extends CBORValue + ? ([Uint8Array]|[Uint8Array, DecodeOptions]) + : [Uint8Array, WithRequired>>, "onValue">] ): T ``` #### `decodeIterable` ```ts -/** Decode an iterable of Uint8Array chunks into an iterable of CBOR values */ -export function* decodeIterable( - source: Iterable, - options?: DecodeOptions, +/** + * Decode an iterable of Uint8Array chunks into an iterable of CBOR values + * @param source Iterable of Uint8Array chunks + * @param options Decode options + */ +export function* decodeIterable(...[source, options]: T extends CBORValue + ? ([Iterable]|[Iterable, DecodeOptions]) + : [Iterable, WithRequired>>, "onValue">] ): IterableIterator ``` +#### `AsyncDecodeOptions` + +```ts +export interface AsyncDecodeOptions extends Omit { + /** + * Function to remap/validate object keys while decoding + * (async version that works with AsyncIterable and streams) + * @param decodeKey Function to decode original object key (async) + * @param length Key length to validate pre-decoding + * @throws Error if length/key is invalid + * @returns An optional replacement key string + */ + onKey?: ( + decodeKey: () => Awaitable, + length: number + ) => Awaitable + + /** + * Function to validate/transform/replace values while decoding + * (async version that works with AsyncIterable and streams) + * @param decodeValue Function to decode value (async) + * @param length Value length/size to validate pre-decoding + * @param type Value type (e.g. 'number', 'string', 'Uint8Array'...) + * @param keyPath Array of keys describing the access path to this value + * @throws Error if length/value is invalid + * @returns An optional replacement value to use + */ + onValue?: ( + decodeValue: () => Awaitable, + length: number, + type: string, + keyPath: (string|number)[] + ) => Awaitable + + /** Callback function when the decoder requires more data */ + onPull?: () => void +} +``` + #### `decodeAsyncIterable` ```ts -/** Decode an async iterable of Uint8Array chunks into an async iterable of CBOR values */ -export async function* decodeAsyncIterable( - source: AsyncIterable, - options?: DecodeOptions, -): AsyncIterable +/** + * Decode an async iterable of Uint8Array chunks into an async iterable of CBOR values + * @param source Async iterable of Uint8Array chunks + * @param options Decode options + */ +export async function* decodeAsyncIterable(...[source, options]: T extends CBORValue + ? ([AsyncIterable]|[AsyncIterable, AsyncDecodeOptions]) + : [AsyncIterable, WithRequired>>, "onValue">] +): AsyncIterableIterator ``` #### `CBORDecoderStream` ```ts -/** Decode a Web Streams API ReadableStream. */ -export class CBORDecoderStream< - T extends CBORValue = CBORValue, -> extends TransformStream { - public constructor() +/** Decode a Web Streams API ReadableStream */ +export class CBORDecoderStream { + readable: ReadableStream + writable: WritableStream + + constructor(...[options]: T extends CBORValue + ? []|[AsyncDecodeOptions] + : [WithRequired>>, "onValue">] + ) } ``` @@ -265,6 +390,130 @@ declare class UnsafeIntegerError extends RangeError { | `7` (null) | `null` | | | `7` (undefined) | `undefined` | | +## Key mapping + +`onKey()` provides a way to remap object keys during encoding/decoding, allowing to reduce payload size or add a layer of obfuscation. It can also be used to check for abnormal key lengths, aborting the operation early and saving resources. + +Example: + +```ts +import { encode, decode } from "microcbor" + +const keys = ["firstName", "lastName", "emailAddress", ...] + +const encoded = encode( + { firstName: "John", lastName: "Doe", emailAddress: "john@example.com" }, + { onKey: (key) => keys.indexOf(key) + "" } +) + +const decoded = decode(encoded, { + onKey: (decodeKey, length) => { + if (length > 30) throw new Error("Key length is too long") + return keys[+decodeKey()] + } +}) +``` + +## Value validation and transformation + +`onValue()` enables the validation of values and their lengths during decoding, allowing to catch invalid data early in the process. Here is a very basic example on how it could be done using Zod: + +```ts +import { encode, decode, type CBORMap } from "microcbor" +import { z, type ZodObject, type ZodOptional, type ZodString } from "zod" + +const UserSchema = z.object({ + id: z.number().int().positive(), + email: z.email().min(5).max(19), + profile: z.object({ + age: z.number().int().min(0).max(150).optional(), + createdAt: z.number().positive() + }) +}) + +type User = z.infer + +const user: User = { + id: 1234, + email: "dummy@example.com", + profile: { + age: 30, + createdAt: Date.now() + } +} + +const encoded = encode(user) +console.log(encoded) + +const decoded = decode(encoded, { + onValue: (decodeValue, length, type, keyPath) => { + let zodVal: unknown = UserSchema + for (const key of keyPath) { + zodVal = (zodVal as ZodObject)?.shape?.[key] + while ((zodVal as ZodOptional)?.unwrap) + zodVal = (zodVal as ZodOptional).unwrap() + if (!zodVal) throw new Error("Unknown property") + } + + switch(type) { + case "string": { + const zodStr = zodVal as ZodString + if (zodStr.type !== type) throw new Error("Incorrect value type") + const { minimum, maximum } = zodStr._zod.bag + if (minimum !== undefined && length < minimum) throw new Error("String too short") + if (maximum !== undefined && length > maximum) throw new Error("String too long") + zodStr.parse(decodeValue() as string) + break + } + case "object": { + const zodObj = zodVal as ZodObject + const keys = Object.entries(zodObj.shape) + .filter(([_, val]) => !val.isOptional()) + .map(([key]) => key) + if (length < keys.length) throw new Error("Missing properties") + const object = decodeValue() as CBORMap + for (const key of keys) + if (!(key in object)) throw new Error("Missing properties") + break + } + default: + (zodVal as ZodObject).parse(decodeValue()) + } + } +}) +console.log(decoded) +``` + +`onValue()` can also be used to transform values on the fly. Here is an example for converting between Dates and timestamps: + +```ts +import { encode, decode } from "microcbor" + +const user = { + id: 1234, + email: "dummy@example.com", + profile: { + age: 30, + createdAt: new Date() + } +} + +const encoded = encode(user, { + onValue: (value, _keyPath) => { + if (value instanceof Date) return +value + } +}) +console.log(encoded) + +const decoded = decode(encoded, { + onValue: (decodeValue, _length, type, keyPath) => { + if (type === "number" && keyPath.join(".") === "profile.createdAt") + return new Date(+decodeValue()) + } +}) +console.log(decoded) +``` + ## Testing Tests use [AVA](https://github.com/avajs/ava) and live in the [test](./test/) directory. Tests use [node-cbor](https://github.com/hildjj/node-cbor/) to validate encoding results. More tests are always welcome! diff --git a/src/CBORDecoderStream.ts b/src/CBORDecoderStream.ts index dfafe0e..8acce84 100644 --- a/src/CBORDecoderStream.ts +++ b/src/CBORDecoderStream.ts @@ -1,48 +1,50 @@ -import { Decoder } from "./decodeAsyncIterable.js" -import { DecodeOptions } from "./options.js" -import { CBORValue } from "./types.js" +import { Decoder, type AsyncDecodeOptions } from "./decodeAsyncIterable.js" +import { createTransformWithBackpressure } from "./utils.js" +import type { WithRequired, Flatten, NoInfer } from "./utils.js" +import type { CBORValue } from "./types.js" /** Decode a Web Streams API ReadableStream */ -export class CBORDecoderStream extends TransformStream { - constructor(options: DecodeOptions = {}) { - let readableController: ReadableStreamDefaultController - - const readable = new ReadableStream({ - start(controller) { - readableController = controller +export class CBORDecoderStream { + readable!: ReadableStream + writable!: WritableStream + + constructor(...[options = {}]: T extends CBORValue + ? []|[AsyncDecodeOptions] + : [WithRequired>>, "onValue">] + ) { + let transformResolve: (() => void) | null = null + const { writable, readable: readable_ } = new TransformStream( + { + transform(chunk, controller) { + return new Promise((resolve) => { + transformResolve = () => { + transformResolve = null + resolve() + } + controller.enqueue(chunk) + }) + } }, - }) - - // We need to track whick chunks have been "processed" and only resolve each - // .transform() promise once all data from each chunk has been enqueued. - const chunks = new WeakMap void }>() - - async function pipe(controller: TransformStreamDefaultController) { - const decoder = new Decoder(readable.values(), { - ...options, - onFree: (chunk) => chunks.get(chunk)?.resolve(), - }) - - for await (const value of decoder) { - controller.enqueue(value) + { highWaterMark: 1 }, + { highWaterMark: 1 } + ) + + const { readable, writable: writable_ } = createTransformWithBackpressure( + async function pipe(_, enqueue) { + const decoder = new Decoder(readable_.values(), { + ...options, + onPull: () => transformResolve?.() + } as AsyncDecodeOptions) + for await (const value of decoder) { + await enqueue(value as T) + } + writer.close() // Close stream } - } + ) - super({ - start(controller) { - pipe(controller).catch((err) => controller.error(err)) - }, - - transform(chunk) { - return new Promise((resolve) => { - chunks.set(chunk, { resolve }) - readableController.enqueue(chunk) - }) - }, + const writer = writable_.getWriter() + writer.write(undefined) // Jump-start stream - flush() { - readableController.close() - }, - }) + return { readable, writable } } } diff --git a/src/CBOREncoderStream.ts b/src/CBOREncoderStream.ts index 4944e2f..a6e84bf 100644 --- a/src/CBOREncoderStream.ts +++ b/src/CBOREncoderStream.ts @@ -1,29 +1,36 @@ import { Encoder } from "./Encoder.js" -import { CBORValue } from "./types.js" -import { EncodeOptions } from "./options.js" +import { createTransformWithBackpressure } from "./utils.js" +import type { CBORValue } from "./types.js" +import type { EncodeOptions } from "./options.js" +import type { Flatten, WithRequired, NoInfer } from "./utils.js" /** * Encode a Web Streams API ReadableStream. * options.chunkRecycling has no effect here. */ -export class CBOREncoderStream extends TransformStream { - constructor(options: EncodeOptions = {}) { - const encoder = new Encoder({ ...options, chunkRecycling: false }) +export class CBOREncoderStream { + readable!: ReadableStream + writable!: WritableStream - super({ - transform(value: CBORValue, controller: TransformStreamDefaultController) { + constructor(...[options = {}]: T extends CBORValue + ? []|[EncodeOptions] + : [WithRequired>>, "onValue">] + ) { + const encoder = new Encoder({ ...options, chunkRecycling: false } as EncodeOptions) + + return createTransformWithBackpressure( + async (value, enqueue) => { // Encode the incoming value and push all resulting chunks - for (const chunk of encoder.encodeValue(value)) { - controller.enqueue(chunk) + for (const chunk of encoder.encodeValue(value as CBORValue)) { + await enqueue(chunk) } }, - - flush(controller: TransformStreamDefaultController) { - // Push any remaining chunks when the stream is closing + async (enqueue) => { + // Flush any remaining chunks for (const chunk of encoder.flush()) { - controller.enqueue(chunk) + await enqueue(chunk) } - }, - }) + } + ) } } diff --git a/src/Decoder.ts b/src/Decoder.ts index bdadd2d..728ad7b 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -1,30 +1,57 @@ import { getFloat16 } from "fp16" -import type { CBORValue } from "./types.js" +import type { CBORValue, CBORArray, CBORMap } from "./types.js" import type { DecodeOptions, FloatSize } from "./options.js" +import type { WithRequired, Flatten, NoInfer } from "./utils.js" import { UnsafeIntegerError, maxSafeInteger, minSafeInteger } from "./utils.js" -export class Decoder { +export class Decoder { public readonly allowUndefined: boolean public readonly minFloatSize: (typeof FloatSize)[keyof typeof FloatSize] + private readonly decoder = new TextDecoder() + private readonly onKey?: (decodeKey: () => string, length: number) => string|void + private readonly onValue?: ( + decodeValue: () => CBORValue, + length: number, + type: string, + keyPath: (string|number)[] + ) => CBORValue|void + + private data: Uint8Array #offset: number #view: DataView + #env: { + isKey: boolean + keyPath: (string|number)[] + } - public constructor( - private readonly data: Uint8Array, - options: DecodeOptions = {}, + public constructor(...[data, options = {}]: T extends CBORValue + ? ([Uint8Array]|[Uint8Array, DecodeOptions]) + : [Uint8Array, WithRequired>>, "onValue">] ) { + this.data = data this.#offset = 0 this.#view = new DataView(data.buffer, data.byteOffset, data.byteLength) + this.#env = { isKey: false, keyPath: [] } this.allowUndefined = options.allowUndefined ?? true this.minFloatSize = options.minFloatSize ?? 16 + this.onKey = options.onKey + this.onValue = (options as DecodeOptions).onValue } public getOffset(): number { return this.#offset } + private pushKey(key: string|number) { + this.#env.keyPath.push(key) + } + + private popKey() { + this.#env.keyPath.pop() + } + private constant = (size: number, f: () => T) => () => { @@ -49,7 +76,7 @@ export class Decoder { } private decodeString(length: number): string { - const value = new TextDecoder().decode(this.data.subarray(this.#offset, this.#offset + length)) + const value = this.decoder.decode(this.data.subarray(this.#offset, this.#offset + length)) this.#offset += length return value } @@ -57,19 +84,20 @@ export class Decoder { private getArgument(additionalInformation: number): { value: number uint64?: bigint + size: number } { if (additionalInformation < 24) { - return { value: additionalInformation } + return { value: additionalInformation, size: 1 } } else if (additionalInformation === 24) { - return { value: this.uint8() } + return { value: this.uint8(), size: 1 } } else if (additionalInformation === 25) { - return { value: this.uint16() } + return { value: this.uint16(), size: 2 } } else if (additionalInformation === 26) { - return { value: this.uint32() } + return { value: this.uint32(), size: 4 } } else if (additionalInformation === 27) { const uint64 = this.uint64() const value = maxSafeInteger < uint64 ? Infinity : Number(uint64) - return { value, uint64 } + return { value, uint64, size: 8 } } else if (additionalInformation === 31) { throw new Error("microcbor does not support decoding indefinite-length items") } else { @@ -77,81 +105,140 @@ export class Decoder { } } + public decodeValue(): R public decodeValue(): CBORValue { const initialByte = this.uint8() const majorType = initialByte >> 5 const additionalInformation = initialByte & 0x1f + const { isKey, keyPath } = this.#env if (majorType === 0) { - const { value, uint64 } = this.getArgument(additionalInformation) + const { value, uint64, size } = this.getArgument(additionalInformation) if (uint64 !== undefined && maxSafeInteger < uint64) { throw new UnsafeIntegerError("cannot decode integers greater than 2^53-1", uint64) - } else { - return value } + const val = this.onValue?.(() => value, size, "number", keyPath) + return val === undefined ? value : val } else if (majorType === 1) { - const { value, uint64 } = this.getArgument(additionalInformation) + const { value, uint64, size } = this.getArgument(additionalInformation) if (uint64 !== undefined && -1n - uint64 < minSafeInteger) { throw new UnsafeIntegerError("cannot decode integers less than -2^53+1", -1n - uint64) - } else { - return -1 - value } + const val = this.onValue?.(() => (-1 - value), size, "number", keyPath) + return val === undefined ? (-1 - value) : val } else if (majorType === 2) { const { value: length } = this.getArgument(additionalInformation) - return this.decodeBytes(length) + let value: CBORValue + const callback = () => ( + value = (value === undefined ? this.decodeBytes(length) : value) as Uint8Array + ) + const val = this.onValue?.(callback, length, "Uint8Array", keyPath) + if (val !== undefined) { + if (value === undefined) this.#offset += length + return val + } + return callback() } else if (majorType === 3) { const { value: length } = this.getArgument(additionalInformation) - return this.decodeString(length) + let value: CBORValue, val + const callback = () => ( + value = (value === undefined ? this.decodeString(length) : value) as string + ) + if (isKey) val = this.onKey?.(callback, length) + else val = this.onValue?.(callback, length, "string", keyPath) + if (val !== undefined) { + if (value === undefined) this.#offset += length + return val + } + return callback() } else if (majorType === 4) { const { value: length } = this.getArgument(additionalInformation) - const value = new Array(length) - for (let i = 0; i < length; i++) { - value[i] = this.decodeValue() + let value: CBORValue + const callback = () => { + if (value !== undefined) return value as CBORArray + value = new Array(length) + for (let i = 0; i < length; i++) { + this.pushKey(i) + value[i] = this.decodeValue() + this.popKey() + } + return value } - return value + const val = this.onValue?.(callback, length, "array", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + for (let i = 0; i < length; i++) this.skipValue() + return val + } + return callback() } else if (majorType === 5) { const { value: length } = this.getArgument(additionalInformation) - const value: Record = {} - for (let i = 0; i < length; i++) { - const key = this.decodeValue() - if (typeof key !== "string") { - throw new Error("microcbor only supports string keys in objects") + let value: CBORValue|void + const callback = () => { + if (value !== undefined) return value as CBORMap + value = {} + for (let i = 0; i < length; i++) { + this.#env.isKey = true + const key = this.decodeValue() + this.#env.isKey = false + if (typeof key !== "string") { + throw new Error("microcbor only supports string keys in objects") + } + if (key in value) { + throw new Error("duplicate object key") + } + this.pushKey(key) + value[key] = this.decodeValue() + this.popKey() } - value[key] = this.decodeValue() + return value } - return value + const val = this.onValue?.(callback, length, "object", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + for (let i = 0; i < length * 2; i++) this.skipValue() + return val + } + return callback() } else if (majorType === 6) { throw new Error("microcbor does not support tagged data items") } else if (majorType === 7) { + let val switch (additionalInformation) { case 20: - return false + val = this.onValue?.(() => false, 1, "boolean", keyPath) + return val === undefined ? false : val case 21: - return true + val = this.onValue?.(() => true, 1, "boolean", keyPath) + return val === undefined ? true : val case 22: - return null + val = this.onValue?.(() => null, 1, "null", keyPath) + return val === undefined ? null : val case 23: - if (this.allowUndefined) { - return undefined - } else { - throw new TypeError("`undefined` not allowed") - } + if (!this.allowUndefined) throw new TypeError("`undefined` not allowed") + return this.onValue?.(() => undefined, 1, "undefined", keyPath) as CBORValue case 24: throw new Error("microcbor does not support decoding unassigned simple values") case 25: if (this.minFloatSize <= 16) { - return this.float16() + const value = this.float16() + val = this.onValue?.(() => value, 2, "number", keyPath) + return val === undefined ? value : val } else { throw new Error("cannot decode float16 type - below provided minFloatSize") } case 26: if (this.minFloatSize <= 32) { - return this.float32() + const value = this.float32() + val = this.onValue?.(() => value, 4, "number", keyPath) + return val === undefined ? value : val } else { throw new Error("cannot decode float32 type - below provided minFloatSize") } case 27: - return this.float64() + const value = this.float64() + val = this.onValue?.(() => value, 8, "number", keyPath) + return val === undefined ? value : val case 31: throw new Error("microcbor does not support decoding indefinite-length items") default: @@ -161,9 +248,60 @@ export class Decoder { throw new Error("invalid major type") } } + + private skipValue() { + const initialByte = this.uint8() + const majorType = initialByte >> 5 + const additionalInformation = initialByte & 0x1f + + if (majorType === 0 || majorType === 1) { + this.getArgument(additionalInformation) + } else if (majorType === 2 || majorType === 3) { + const { value: length } = this.getArgument(additionalInformation) + this.#offset += length + } else if (majorType === 4) { + const { value: length } = this.getArgument(additionalInformation) + for (let i = 0; i < length; i++) this.skipValue() + } else if (majorType === 5) { + const { value: length } = this.getArgument(additionalInformation) + for (let i = 0; i < length * 2; i++) this.skipValue() + } else if (majorType === 6) { + throw new Error("microcbor does not support tagged data items") + } else if (majorType === 7) { + switch (additionalInformation) { + case 20: case 21: case 22: + break + case 23: + if (!this.allowUndefined) throw new TypeError("`undefined` not allowed") + break + case 24: + throw new Error("microcbor does not support decoding unassigned simple values") + case 25: + this.#offset += 2 + break + case 26: + this.#offset += 4 + break + case 27: + this.#offset += 8 + break + case 31: + throw new Error("microcbor does not support decoding indefinite-length items") + default: + throw new Error("invalid simple value") + } + } + } } -/** Decode a single CBOR value */ -export function decode(data: Uint8Array, options: DecodeOptions = {}): T { - return new Decoder(data, options).decodeValue() as T +/** + * Decode a single CBOR value + * @param data Data to decode + * @param options Decode options + */ +export function decode(...[data, options]: T extends CBORValue + ? ([Uint8Array]|[Uint8Array, DecodeOptions]) + : [Uint8Array, WithRequired>>, "onValue">] +) { + return new Decoder(data, options as DecodeOptions).decodeValue() as T } diff --git a/src/Encoder.ts b/src/Encoder.ts index 08d3ab8..41330b5 100644 --- a/src/Encoder.ts +++ b/src/Encoder.ts @@ -1,10 +1,10 @@ import { Precision, getFloat16Precision, getFloat32Precision, setFloat16 } from "fp16" import type { CBORValue } from "./types.js" -import { EncodeOptions, FloatSize } from "./options.js" -import { assert } from "./utils.js" +import { FloatSize, type EncodeOptions } from "./options.js" +import { assert, type WithRequired, type Flatten, type NoInfer } from "./utils.js" -export class Encoder { +export class Encoder { public static defaultChunkSize = 4096 #closed: boolean @@ -13,30 +13,49 @@ export class Encoder { public readonly chunkSize: number public readonly minFloatSize: (typeof FloatSize)[keyof typeof FloatSize] + private readonly onKey?: (key: string) => string|void + private readonly onValue?: (value: CBORValue, keyPath: (string|number)[]) => CBORValue|void private readonly encoder = new TextEncoder() private readonly buffer: ArrayBuffer private readonly view: DataView private readonly array: Uint8Array private offset: number + #env: { + keyPath: (string|number)[] + } - constructor(options: EncodeOptions = {}) { + constructor(...[options = {}]: T extends CBORValue + ? []|[EncodeOptions] + : [WithRequired>>, "onValue">] + ) { this.allowUndefined = options.allowUndefined ?? true this.minFloatSize = options.minFloatSize ?? 16 this.chunkRecycling = options.chunkRecycling ?? false this.chunkSize = options.chunkSize ?? Encoder.defaultChunkSize assert(this.chunkSize >= 8, "expected chunkSize >= 8") + this.onKey = options.onKey + this.onValue = (options as EncodeOptions).onValue this.buffer = new ArrayBuffer(this.chunkSize) this.view = new DataView(this.buffer) this.array = new Uint8Array(this.buffer, 0, this.chunkSize) this.offset = 0 this.#closed = false + this.#env = { keyPath: [] } } public get closed() { return this.#closed } + private pushKey(key: string|number) { + this.#env.keyPath.push(key) + } + + private popKey() { + this.#env.keyPath.pop() + } + #flush(): Uint8Array { if (this.chunkRecycling) { const chunk = new Uint8Array(this.buffer, 0, this.offset) @@ -189,11 +208,16 @@ export class Encoder { } } - public *encodeValue(value: CBORValue): Iterable { + public *encodeValue(value: T|CBORValue): Iterable { if (this.#closed) { return } + if (this.onValue) { + const val = this.onValue(value as CBORValue, this.#env.keyPath) + if (val !== undefined) value = val + } + if (value === false) { yield* this.uint8(0xf4) } else if (value === true) { @@ -214,19 +238,26 @@ export class Encoder { yield* this.encodeBytes(value) } else if (Array.isArray(value)) { yield* this.encodeTypeAndArgument(4, value.length) - for (const element of value) { - yield* this.encodeValue(element) + for (let i = 0; i < value.length; i++) { + this.pushKey(i) + yield* this.encodeValue(value[i]) + this.popKey() } } else { const entries = Object.entries(value) - .map<[Uint8Array, CBORValue]>(([key, value]) => [this.encoder.encode(key), value]) + .map<[Uint8Array, CBORValue, string]>(([ogKey, value]) => { + let key = this.onKey?.(ogKey) + return [this.encoder.encode(key === undefined ? ogKey : key + ""), value, ogKey] + }) .sort(Encoder.compareEntries) yield* this.encodeTypeAndArgument(5, entries.length) - for (const [key, value] of entries) { + for (const [key, value, ogKey] of entries) { yield* this.encodeTypeAndArgument(3, key.byteLength) yield* this.writeBytes(key) + this.pushKey(ogKey) yield* this.encodeValue(value) + this.popKey() } } } @@ -249,7 +280,10 @@ export class Encoder { // with a longer length, since strings are encoded with a length // prefix (either in the additionalInformation bits, if < 24, or // in the next serveral bytes, but in all cases the order holds). - private static compareEntries([a]: [key: Uint8Array, value: CBORValue], [b]: [key: Uint8Array, value: CBORValue]) { + private static compareEntries( + [a]: [key: Uint8Array, value: CBORValue, ogKey: string], + [b]: [key: Uint8Array, value: CBORValue, ogKey: string] + ) { if (a.byteLength < b.byteLength) return -1 if (b.byteLength < a.byteLength) return 1 @@ -278,8 +312,12 @@ export class Encoder { /** * Encode a single CBOR value. + * @param value Value to encode + * @param options Encode options * options.chunkRecycling has no effect here. */ +export function encode(value: T, options?: EncodeOptions): Uint8Array +export function encode(value: T, options: WithRequired>>, "onValue">): Uint8Array export function encode(value: CBORValue, options: EncodeOptions = {}): Uint8Array { const encoder = new Encoder({ ...options, chunkRecycling: false }) diff --git a/src/decodeAsyncIterable.ts b/src/decodeAsyncIterable.ts index 769ed46..bd3b94b 100644 --- a/src/decodeAsyncIterable.ts +++ b/src/decodeAsyncIterable.ts @@ -1,94 +1,120 @@ import { getFloat16 } from "fp16" -import type { CBORValue } from "./types.js" - +import type { CBORValue, CBORArray, CBORMap } from "./types.js" +import type { DecodeOptions, FloatSize } from "./options.js" +import type { WithRequired, Flatten, NoInfer, Awaitable } from "./utils.js" import { UnsafeIntegerError, maxSafeInteger, minSafeInteger } from "./utils.js" -import { DecodeOptions, FloatSize } from "./options.js" -export interface AsyncDecodeOptions extends DecodeOptions { - onFree?: (chunk: Uint8Array) => void +export interface AsyncDecodeOptions extends Omit { + /** + * Function to remap/validate object keys while decoding + * (async version that works with AsyncIterable and streams) + * @param decodeKey Function to decode original object key (async) + * @param length Key length to validate pre-decoding + * @throws Error if length/key is invalid + * @returns An optional replacement key string + */ + onKey?: ( + decodeKey: () => Awaitable, + length: number + ) => Awaitable + + /** + * Function to validate/transform/replace values while decoding + * (async version that works with AsyncIterable and streams) + * @param decodeValue Function to decode value (async) + * @param length Value length/size to validate pre-decoding + * @param type Value type (e.g. 'number', 'string', 'Uint8Array'...) + * @param keyPath Array of keys describing the access path to this value + * @throws Error if length/value is invalid + * @returns An optional replacement value to use + */ + onValue?: ( + decodeValue: () => Awaitable, + length: number, + type: string, + keyPath: (string|number)[] + ) => Awaitable + + /** Callback function when the decoder requires more data */ + onPull?: () => void } -export class Decoder implements AsyncIterableIterator { +export class Decoder implements AsyncIterableIterator { public readonly allowUndefined: boolean public readonly minFloatSize: (typeof FloatSize)[keyof typeof FloatSize] private offset = 0 private byteLength = 0 + private readonly decoder = new TextDecoder() private readonly chunks: Uint8Array[] = [] private readonly constantBuffer = new ArrayBuffer(8) private readonly constantView = new DataView(this.constantBuffer) private readonly iter: AsyncIterator - private readonly onFree?: (chunk: Uint8Array) => void - private readonly touchedChunks = new WeakSet() - private readonly freedChunks = new WeakSet() + private readonly onPull?: () => void + private readonly onKey?: ( + decodeKey: () => Awaitable, + length: number + ) => Awaitable + private readonly onValue?: ( + decodeValue: () => Awaitable, + length: number, + type: string, + keyPath: (string|number)[] + ) => Awaitable + private env: { + isKey: boolean + keyPath: (string|number)[] + } = { isKey: false, keyPath: [] } - public constructor(source: AsyncIterable, options: AsyncDecodeOptions = {}) { - this.onFree = options.onFree + public constructor(...[source, options = {}]: T extends CBORValue + ? ([AsyncIterable]|[AsyncIterable, AsyncDecodeOptions]) + : [AsyncIterable, WithRequired>>, "onValue">] + ) { + this.onPull = options.onPull this.allowUndefined = options.allowUndefined ?? true this.minFloatSize = options.minFloatSize ?? 16 this.iter = source[Symbol.asyncIterator]() + this.onKey = options.onKey + this.onValue = (options as AsyncDecodeOptions).onValue } [Symbol.asyncIterator] = () => this private async allocate(size: number) { - // If we need more data, first call onFree for all touched chunks - // This allows the transform stream to provide more chunks - if (this.byteLength < size && this.onFree !== undefined) { - for (const chunk of this.chunks) { - if (this.touchedChunks.has(chunk) && !this.freedChunks.has(chunk)) { - this.freedChunks.add(chunk) - this.onFree(chunk) - } - } - } - while (this.byteLength < size) { + this.onPull?.() const { done, value } = await this.iter.next() if (done) { throw new Error("stream ended prematurely") } else { this.chunks.push(value) this.byteLength += value.byteLength - - // If we still need more data after adding this chunk, - // immediately call onFree to allow the next chunk to flow - if (this.byteLength < size && this.onFree !== undefined && !this.freedChunks.has(value)) { - this.freedChunks.add(value) - this.onFree(value) - } } } } - private fill(target: Uint8Array) { - if (this.byteLength < target.byteLength) { + private advance(length: number, target?: Uint8Array) { + if (this.byteLength < length) { throw new Error("internal error - please file a bug report!") } let byteLength = 0 let deleteCount = 0 - for (let i = 0; byteLength < target.byteLength; i++) { + for (let i = 0; byteLength < length; i++) { const chunk = this.chunks[i] - - // Track which chunks we touched - if (!this.touchedChunks.has(chunk)) { - this.touchedChunks.add(chunk) - } - - const capacity = target.byteLength - byteLength - const length = chunk.byteLength - this.offset - if (length <= capacity) { + const capacity = length - byteLength + const available = chunk.byteLength - this.offset + if (available <= capacity) { // copy the entire remainder of the chunk - target.set(chunk.subarray(this.offset), byteLength) - byteLength += length + target?.set(chunk.subarray(this.offset), byteLength) + byteLength += available deleteCount += 1 this.offset = 0 - this.byteLength -= length + this.byteLength -= available } else { // fill the remainder of the target - target.set(chunk.subarray(this.offset, this.offset + capacity), byteLength) + target?.set(chunk.subarray(this.offset, this.offset + capacity), byteLength) byteLength += capacity // equivalent to break this.offset += capacity @@ -96,20 +122,21 @@ export class Decoder implements AsyncIterableIt } } - // Call onFree for chunks that are being removed (fully consumed) - if (this.onFree !== undefined) { - for (let i = 0; i < deleteCount; i++) { - const chunk = this.chunks[i] - if (!this.freedChunks.has(chunk)) { - this.freedChunks.add(chunk) - this.onFree(chunk) - } - } - } - this.chunks.splice(0, deleteCount) } + private fill(target: Uint8Array) { + this.advance(target.byteLength, target) + } + + private pushKey(key: string|number) { + this.env.keyPath.push(key) + } + + private popKey() { + this.env.keyPath.pop() + } + private constant = (size: number, f: (view: DataView) => T) => { return async () => { await this.allocate(size) @@ -138,25 +165,26 @@ export class Decoder implements AsyncIterableIt await this.allocate(length) const data = new Uint8Array(length) this.fill(data) - return new TextDecoder().decode(data) + return this.decoder.decode(data) } private async getArgument(additionalInformation: number): Promise<{ value: number uint64?: bigint + size: number }> { if (additionalInformation < 24) { - return { value: additionalInformation } + return { value: additionalInformation, size: 1 } } else if (additionalInformation === 24) { - return { value: await this.uint8() } + return { value: await this.uint8(), size: 1 } } else if (additionalInformation === 25) { - return { value: await this.uint16() } + return { value: await this.uint16(), size: 2 } } else if (additionalInformation === 26) { - return { value: await this.uint32() } + return { value: await this.uint32(), size: 4 } } else if (additionalInformation === 27) { const uint64 = await this.uint64() const value = maxSafeInteger < uint64 ? Infinity : Number(uint64) - return { value, uint64 } + return { value, uint64, size: 8 } } else if (additionalInformation === 31) { throw new Error("microcbor does not support decoding indefinite-length items") } else { @@ -166,6 +194,7 @@ export class Decoder implements AsyncIterableIt public async next(): Promise<{ done: true; value: undefined } | { done: false; value: T }> { while (this.byteLength === 0) { + this.onPull?.() const { done, value } = await this.iter.next() if (done) { return { done: true, value: undefined } @@ -183,77 +212,139 @@ export class Decoder implements AsyncIterableIt const initialByte = await this.uint8() const majorType = initialByte >> 5 const additionalInformation = initialByte & 0x1f + const { isKey, keyPath } = this.env if (majorType === 0) { - const { value, uint64 } = await this.getArgument(additionalInformation) + const { value, uint64, size } = await this.getArgument(additionalInformation) if (uint64 !== undefined && maxSafeInteger < uint64) { throw new UnsafeIntegerError("cannot decode integers greater than 2^53-1", uint64) - } else { - return value } + const val = await this.onValue?.(() => value, size, "number", keyPath) + return val === undefined ? value : val } else if (majorType === 1) { - const { value, uint64 } = await this.getArgument(additionalInformation) + const { value, uint64, size } = await this.getArgument(additionalInformation) if (uint64 !== undefined && -1n - uint64 < minSafeInteger) { throw new UnsafeIntegerError("cannot decode integers less than -2^53+1", -1n - uint64) - } else { - return -1 - value } + const val = await this.onValue?.(() => (-1 - value), size, "number", keyPath) + return val === undefined ? (-1 - value) : val } else if (majorType === 2) { const { value: length } = await this.getArgument(additionalInformation) - return await this.decodeBytes(length) + let value: CBORValue + const callback = async () => ( + value = (value === undefined ? await this.decodeBytes(length) : value) as Uint8Array + ) + const val = await this.onValue?.(callback, length, "Uint8Array", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + await this.allocate(length) + this.advance(length) + return val + } + return callback() } else if (majorType === 3) { const { value: length } = await this.getArgument(additionalInformation) - return await this.decodeString(length) + let value: CBORValue, val + const callback = async () => ( + value = (value === undefined ? await this.decodeString(length) : value) as string + ) + if (isKey) val = await this.onKey?.(callback, length) + else val = await this.onValue?.(callback, length, "string", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + await this.allocate(length) + this.advance(length) + return val + } + return callback() } else if (majorType === 4) { const { value: length } = await this.getArgument(additionalInformation) - const value = new Array(length) - for (let i = 0; i < length; i++) { - value[i] = await this.decodeValue() + let value: CBORValue + const callback = async () => { + if (value !== undefined) return value as CBORArray + value = new Array(length) + for (let i = 0; i < length; i++) { + this.pushKey(i) + value[i] = await this.decodeValue() + this.popKey() + } + return value } - return value + const val = await this.onValue?.(callback, length, "array", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + for (let i = 0; i < length; i++) await this.skipValue() + return val + } + return callback() } else if (majorType === 5) { const { value: length } = await this.getArgument(additionalInformation) - const value: Record = {} - for (let i = 0; i < length; i++) { - const key = await this.decodeValue() - if (typeof key !== "string") { - throw new Error("microcbor only supports string keys in objects") + let value: CBORValue|void + const callback = async () => { + if (value !== undefined) return value as CBORMap + value = {} + for (let i = 0; i < length; i++) { + this.env.isKey = true + const key = await this.decodeValue() + this.env.isKey = false + if (typeof key !== "string") { + throw new Error("microcbor only supports string keys in objects") + } + if (key in value) { + throw new Error("duplicate object key") + } + this.pushKey(key) + value[key] = await this.decodeValue() + this.popKey() } - value[key] = await this.decodeValue() + return value } - return value + const val = await this.onValue?.(callback, length, "object", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + for (let i = 0; i < length * 2; i++) await this.skipValue() + return val + } + return callback() } else if (majorType === 6) { throw new Error("microcbor does not support tagged data items") } else if (majorType === 7) { + let val switch (additionalInformation) { case 20: - return false + val = await this.onValue?.(() => false, 1, "boolean", keyPath) + return val === undefined ? false : val case 21: - return true + val = await this.onValue?.(() => true, 1, "boolean", keyPath) + return val === undefined ? true : val case 22: - return null + val = await this.onValue?.(() => null, 1, "null", keyPath) + return val === undefined ? null : val case 23: - if (this.allowUndefined) { - return undefined - } else { - throw new TypeError("`undefined` not allowed") - } + if (!this.allowUndefined) throw new TypeError("`undefined` not allowed") + return await this.onValue?.(() => undefined, 1, "undefined", keyPath) as CBORValue case 24: throw new Error("microcbor does not support decoding unassigned simple values") case 25: if (this.minFloatSize <= 16) { - return this.float16() + const value = await this.float16() + val = await this.onValue?.(() => value, 2, "number", keyPath) + return val === undefined ? value : val } else { throw new Error("cannot decode float16 type - below provided minFloatSize") } case 26: if (this.minFloatSize <= 32) { - return this.float32() + const value = await this.float32() + val = await this.onValue?.(() => value, 4, "number", keyPath) + return val === undefined ? value : val } else { throw new Error("cannot decode float32 type - below provided minFloatSize") } case 27: - return await this.float64() + const value = await this.float64() + val = await this.onValue?.(() => value, 8, "number", keyPath) + return val === undefined ? value : val case 31: throw new Error("microcbor does not support decoding indefinite-length items") default: @@ -263,12 +354,64 @@ export class Decoder implements AsyncIterableIt throw new Error("invalid major type") } } + + private async skipValue() { + const initialByte = await this.uint8() + const majorType = initialByte >> 5 + const additionalInformation = initialByte & 0x1f + + if (majorType === 0 || majorType === 1) { + await this.getArgument(additionalInformation) + } else if (majorType === 2 || majorType === 3) { + const { value: length } = await this.getArgument(additionalInformation) + await this.allocate(length) + this.advance(length) + } else if (majorType === 4) { + const { value: length } = await this.getArgument(additionalInformation) + for (let i = 0; i < length; i++) await this.skipValue() + } else if (majorType === 5) { + const { value: length } = await this.getArgument(additionalInformation) + for (let i = 0; i < length * 2; i++) await this.skipValue() + } else if (majorType === 6) { + throw new Error("microcbor does not support tagged data items") + } else if (majorType === 7) { + switch (additionalInformation) { + case 20: case 21: case 22: + break + case 23: + if (!this.allowUndefined) throw new TypeError("`undefined` not allowed") + break + case 24: + throw new Error("microcbor does not support decoding unassigned simple values") + case 25: + await this.allocate(2) + this.advance(2) + break + case 26: + await this.allocate(4) + this.advance(4) + break + case 27: + await this.allocate(8) + this.advance(8) + break + case 31: + throw new Error("microcbor does not support decoding indefinite-length items") + default: + throw new Error("invalid simple value") + } + } + } } -/** Decode an async iterable of Uint8Array chunks into an async iterable of CBOR values */ -export async function* decodeAsyncIterable( - source: AsyncIterable, - options: AsyncDecodeOptions = {}, -): AsyncIterableIterator { - yield* new Decoder(source, options) +/** + * Decode an async iterable of Uint8Array chunks into an async iterable of CBOR values + * @param source Async iterable of Uint8Array chunks + * @param options Decode options + */ +export async function* decodeAsyncIterable(...args: T extends CBORValue + ? ([AsyncIterable]|[AsyncIterable, AsyncDecodeOptions]) + : [AsyncIterable, WithRequired>>, "onValue">] +): AsyncIterableIterator { + yield* new Decoder(...args) } diff --git a/src/decodeIterable.ts b/src/decodeIterable.ts index 0099d1e..cc7e163 100644 --- a/src/decodeIterable.ts +++ b/src/decodeIterable.ts @@ -1,24 +1,42 @@ import { getFloat16 } from "fp16" -import type { CBORValue } from "./types.js" +import type { CBORValue, CBORArray, CBORMap } from "./types.js" import type { DecodeOptions, FloatSize } from "./options.js" +import type { WithRequired, Flatten, NoInfer } from "./utils.js" import { UnsafeIntegerError, maxSafeInteger, minSafeInteger } from "./utils.js" -export class Decoder implements IterableIterator { +export class Decoder implements IterableIterator { public readonly allowUndefined: boolean public readonly minFloatSize: (typeof FloatSize)[keyof typeof FloatSize] private offset = 0 private byteLength = 0 + private readonly decoder = new TextDecoder() private readonly chunks: Uint8Array[] = [] private readonly constantBuffer = new ArrayBuffer(8) private readonly constantView = new DataView(this.constantBuffer) private readonly iter: Iterator + private readonly onKey?: (decodeKey: () => string, length: number) => string|void + private readonly onValue?: ( + decodeValue: () => CBORValue, + length: number, + type: string, + keyPath: (string|number)[] + ) => CBORValue|void + private env: { + isKey: boolean + keyPath: (string|number)[] + } = { isKey: false, keyPath: [] } - public constructor(source: Iterable, options: DecodeOptions = {}) { + public constructor(...[source, options = {}]: T extends CBORValue + ? ([Iterable]|[Iterable, DecodeOptions]) + : [Iterable, WithRequired>>, "onValue">] + ) { this.allowUndefined = options.allowUndefined ?? true this.minFloatSize = options.minFloatSize ?? 16 this.iter = source[Symbol.iterator]() + this.onKey = options.onKey + this.onValue = (options as DecodeOptions).onValue } [Symbol.iterator] = () => this @@ -35,27 +53,27 @@ export class Decoder implements IterableIterato } } - private fill(target: Uint8Array) { - if (this.byteLength < target.byteLength) { + private advance(length: number, target?: Uint8Array) { + if (this.byteLength < length) { throw new Error("internal error - please file a bug report!") } let byteLength = 0 let deleteCount = 0 - for (let i = 0; byteLength < target.byteLength; i++) { + for (let i = 0; byteLength < length; i++) { const chunk = this.chunks[i] - const capacity = target.byteLength - byteLength - const length = chunk.byteLength - this.offset - if (length <= capacity) { + const capacity = length - byteLength + const available = chunk.byteLength - this.offset + if (available <= capacity) { // copy the entire remainder of the chunk - target.set(chunk.subarray(this.offset), byteLength) - byteLength += length + target?.set(chunk.subarray(this.offset), byteLength) + byteLength += available deleteCount += 1 this.offset = 0 - this.byteLength -= length + this.byteLength -= available } else { // fill the remainder of the target - target.set(chunk.subarray(this.offset, this.offset + capacity), byteLength) + target?.set(chunk.subarray(this.offset, this.offset + capacity), byteLength) byteLength += capacity // equivalent to break this.offset += capacity @@ -66,6 +84,18 @@ export class Decoder implements IterableIterato this.chunks.splice(0, deleteCount) } + private fill(target: Uint8Array) { + this.advance(target.byteLength, target) + } + + private pushKey(key: string|number) { + this.env.keyPath.push(key) + } + + private popKey() { + this.env.keyPath.pop() + } + private constant = (size: number, f: (view: DataView) => T) => { return () => { this.allocate(size) @@ -94,22 +124,26 @@ export class Decoder implements IterableIterato this.allocate(length) const data = new Uint8Array(length) this.fill(data) - return new TextDecoder().decode(data) + return this.decoder.decode(data) } - private getArgument(additionalInformation: number): { value: number; uint64?: bigint } { + private getArgument(additionalInformation: number): { + value: number + uint64?: bigint + size: number + } { if (additionalInformation < 24) { - return { value: additionalInformation } + return { value: additionalInformation, size: 1 } } else if (additionalInformation === 24) { - return { value: this.uint8() } + return { value: this.uint8(), size: 1 } } else if (additionalInformation === 25) { - return { value: this.uint16() } + return { value: this.uint16(), size: 2 } } else if (additionalInformation === 26) { - return { value: this.uint32() } + return { value: this.uint32(), size: 4 } } else if (additionalInformation === 27) { const uint64 = this.uint64() const value = maxSafeInteger < uint64 ? Infinity : Number(uint64) - return { value, uint64 } + return { value, uint64, size: 8 } } else if (additionalInformation === 31) { throw new Error("microcbor does not support decoding indefinite-length items") } else { @@ -136,77 +170,139 @@ export class Decoder implements IterableIterato const initialByte = this.uint8() const majorType = initialByte >> 5 const additionalInformation = initialByte & 0x1f + const { isKey, keyPath } = this.env if (majorType === 0) { - const { value, uint64 } = this.getArgument(additionalInformation) + const { value, uint64, size } = this.getArgument(additionalInformation) if (uint64 !== undefined && maxSafeInteger < uint64) { throw new UnsafeIntegerError("cannot decode integers greater than 2^53-1", uint64) - } else { - return value } + const val = this.onValue?.(() => value, size, "number", keyPath) + return val === undefined ? value : val } else if (majorType === 1) { - const { value, uint64 } = this.getArgument(additionalInformation) + const { value, uint64, size } = this.getArgument(additionalInformation) if (uint64 !== undefined && -1n - uint64 < minSafeInteger) { throw new UnsafeIntegerError("cannot decode integers less than -2^53+1", -1n - uint64) - } else { - return -1 - value } + const val = this.onValue?.(() => (-1 - value), size, "number", keyPath) + return val === undefined ? (-1 - value) : val } else if (majorType === 2) { const { value: length } = this.getArgument(additionalInformation) - return this.decodeBytes(length) + let value: CBORValue + const callback = () => ( + value = (value === undefined ? this.decodeBytes(length) : value) as Uint8Array + ) + const val = this.onValue?.(callback, length, "Uint8Array", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + this.allocate(length) + this.advance(length) + return val + } + return callback() } else if (majorType === 3) { const { value: length } = this.getArgument(additionalInformation) - return this.decodeString(length) + let value: CBORValue, val + const callback = () => ( + value = (value === undefined ? this.decodeString(length) : value) as string + ) + if (isKey) val = this.onKey?.(callback, length) + else val = this.onValue?.(callback, length, "string", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + this.allocate(length) + this.advance(length) + return val + } + return callback() } else if (majorType === 4) { const { value: length } = this.getArgument(additionalInformation) - const value = new Array(length) - for (let i = 0; i < length; i++) { - value[i] = this.decodeValue() + let value: CBORValue + const callback = () => { + if (value !== undefined) return value as CBORArray + value = new Array(length) + for (let i = 0; i < length; i++) { + this.pushKey(i) + value[i] = this.decodeValue() + this.popKey() + } + return value + } + const val = this.onValue?.(callback, length, "array", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + for (let i = 0; i < length; i++) this.skipValue() + return val } - return value + return callback() } else if (majorType === 5) { const { value: length } = this.getArgument(additionalInformation) - const value: Record = {} - for (let i = 0; i < length; i++) { - const key = this.decodeValue() - if (typeof key !== "string") { - throw new Error("microcbor only supports string keys in objects") + let value: CBORValue|void + const callback = () => { + if (value !== undefined) return value as CBORMap + value = {} + for (let i = 0; i < length; i++) { + this.env.isKey = true + const key = this.decodeValue() + this.env.isKey = false + if (typeof key !== "string") { + throw new Error("microcbor only supports string keys in objects") + } + if (key in value) { + throw new Error("duplicate object key") + } + this.pushKey(key) + value[key] = this.decodeValue() + this.popKey() } - value[key] = this.decodeValue() + return value } - return value + const val = this.onValue?.(callback, length, "object", keyPath) + if (val !== undefined) { + if (value !== undefined) return val + for (let i = 0; i < length * 2; i++) this.skipValue() + return val + } + return callback() } else if (majorType === 6) { throw new Error("microcbor does not support tagged data items") } else if (majorType === 7) { + let val switch (additionalInformation) { case 20: - return false + val = this.onValue?.(() => false, 1, "boolean", keyPath) + return val === undefined ? false : val case 21: - return true + val = this.onValue?.(() => true, 1, "boolean", keyPath) + return val === undefined ? true : val case 22: - return null + val = this.onValue?.(() => null, 1, "null", keyPath) + return val === undefined ? null : val case 23: - if (this.allowUndefined) { - return undefined - } else { - throw new TypeError("`undefined` not allowed") - } + if (!this.allowUndefined) throw new TypeError("`undefined` not allowed") + return this.onValue?.(() => undefined, 1, "undefined", keyPath) as CBORValue case 24: throw new Error("microcbor does not support decoding unassigned simple values") case 25: if (this.minFloatSize <= 16) { - return this.float16() + const value = this.float16() + val = this.onValue?.(() => value, 2, "number", keyPath) + return val === undefined ? value : val } else { throw new Error("cannot decode float16 type - below provided minFloatSize") } case 26: if (this.minFloatSize <= 32) { - return this.float32() + const value = this.float32() + val = this.onValue?.(() => value, 4, "number", keyPath) + return val === undefined ? value : val } else { throw new Error("cannot decode float32 type - below provided minFloatSize") } case 27: - return this.float64() + const value = this.float64() + val = this.onValue?.(() => value, 8, "number", keyPath) + return val === undefined ? value : val case 31: throw new Error("microcbor does not support decoding indefinite-length items") default: @@ -216,9 +312,64 @@ export class Decoder implements IterableIterato throw new Error("invalid major type") } } + + private skipValue() { + const initialByte = this.uint8() + const majorType = initialByte >> 5 + const additionalInformation = initialByte & 0x1f + + if (majorType === 0 || majorType === 1) { + this.getArgument(additionalInformation) + } else if (majorType === 2 || majorType === 3) { + const { value: length } = this.getArgument(additionalInformation) + this.allocate(length) + this.advance(length) + } else if (majorType === 4) { + const { value: length } = this.getArgument(additionalInformation) + for (let i = 0; i < length; i++) this.skipValue() + } else if (majorType === 5) { + const { value: length } = this.getArgument(additionalInformation) + for (let i = 0; i < length * 2; i++) this.skipValue() + } else if (majorType === 6) { + throw new Error("microcbor does not support tagged data items") + } else if (majorType === 7) { + switch (additionalInformation) { + case 20: case 21: case 22: + break + case 23: + if (!this.allowUndefined) throw new TypeError("`undefined` not allowed") + break + case 24: + throw new Error("microcbor does not support decoding unassigned simple values") + case 25: + this.allocate(2) + this.advance(2) + break + case 26: + this.allocate(4) + this.advance(4) + break + case 27: + this.allocate(8) + this.advance(8) + break + case 31: + throw new Error("microcbor does not support decoding indefinite-length items") + default: + throw new Error("invalid simple value") + } + } + } } -/** Decode an iterable of Uint8Array chunks into an iterable of CBOR values */ -export function* decodeIterable(source: Iterable): IterableIterator { - yield* new Decoder(source) +/** + * Decode an iterable of Uint8Array chunks into an iterable of CBOR values + * @param source Iterable of Uint8Array chunks + * @param options Decode options + */ +export function* decodeIterable(...args: T extends CBORValue + ? ([Iterable]|[Iterable, DecodeOptions]) + : [Iterable, WithRequired>>, "onValue">] +): IterableIterator { + yield* new Decoder(...args) } diff --git a/src/encodeAsyncIterable.ts b/src/encodeAsyncIterable.ts index 7fe5736..1fc02d5 100644 --- a/src/encodeAsyncIterable.ts +++ b/src/encodeAsyncIterable.ts @@ -1,9 +1,19 @@ -import type { CBORValue } from "./types.js" - import { Encoder } from "./Encoder.js" -import { EncodeOptions } from "./options.js" +import type { CBORValue } from "./types.js" +import type { EncodeOptions } from "./options.js" +import type { Flatten, WithRequired, NoInfer } from "./utils.js" /** Encode an async iterable of CBOR values into an async iterable of Uint8Array chunks */ +export function encodeAsyncIterable( + source: AsyncIterable, + options?: EncodeOptions +): AsyncIterableIterator + +export function encodeAsyncIterable( + source: AsyncIterable, + options: WithRequired>>, "onValue"> +): AsyncIterableIterator + export async function* encodeAsyncIterable( source: AsyncIterable, options: EncodeOptions = {}, diff --git a/src/encodeIterable.ts b/src/encodeIterable.ts index e602ff0..22fba8b 100644 --- a/src/encodeIterable.ts +++ b/src/encodeIterable.ts @@ -1,9 +1,19 @@ -import type { CBORValue } from "./types.js" - import { Encoder } from "./Encoder.js" -import { EncodeOptions } from "./options.js" +import type { CBORValue } from "./types.js" +import type { EncodeOptions } from "./options.js" +import type { Flatten, WithRequired, NoInfer } from "./utils.js" /** Encode an iterable of CBOR values into an iterable of Uint8Array chunks */ +export function encodeIterable( + source: Iterable, + options?: EncodeOptions +): IterableIterator + +export function encodeIterable( + source: Iterable, + options: WithRequired>>, "onValue"> +): IterableIterator + export function* encodeIterable( source: Iterable, options: EncodeOptions = {}, diff --git a/src/index.ts b/src/index.ts index df79e68..746c757 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,7 +5,7 @@ export { Decoder, decode } from "./Decoder.js" export { encodeIterable } from "./encodeIterable.js" export { decodeIterable } from "./decodeIterable.js" export { encodeAsyncIterable } from "./encodeAsyncIterable.js" -export { decodeAsyncIterable } from "./decodeAsyncIterable.js" +export { decodeAsyncIterable, type AsyncDecodeOptions } from "./decodeAsyncIterable.js" export { CBORDecoderStream } from "./CBORDecoderStream.js" export { CBOREncoderStream } from "./CBOREncoderStream.js" export { UnsafeIntegerError } from "./utils.js" diff --git a/src/options.ts b/src/options.ts index 40d5571..75c4988 100644 --- a/src/options.ts +++ b/src/options.ts @@ -1,10 +1,12 @@ +import type { CBORValue } from "./types.js" + export const FloatSize = { f16: 16, f32: 32, f64: 64, } -export interface EncodeOptions { +export interface EncodeOptions { /** * Allow `undefined` * @default true @@ -32,9 +34,26 @@ export interface EncodeOptions { * @default 16 */ minFloatSize?: (typeof FloatSize)[keyof typeof FloatSize] + + /** + * Function to remap/validate object keys while encoding + * @param key Original object key + * @throws Error if key is invalid + * @returns An optional replacement key string + */ + onKey?: (key: string) => string|void + + /** + * Function to validate/transform/replace values while encoding + * @param value Value to validate/transform/replace + * @param keyPath Array of keys describing the access path to this value + * @throws Error if value is invalid + * @returns An optional replacement value to use + */ + onValue?: (value: T, keyPath: (string|number)[]) => CBORValue|void } -export interface DecodeOptions { +export interface DecodeOptions { /** * Allow `undefined` * @default true @@ -46,4 +65,29 @@ export interface DecodeOptions { * @default 16 */ minFloatSize?: (typeof FloatSize)[keyof typeof FloatSize] + + /** + * Function to remap/validate object keys while decoding + * @param decodeKey Function to decode original object key + * @param length Key length to validate pre-decoding + * @throws Error if length/key is invalid + * @returns An optional replacement key string + */ + onKey?: (decodeKey: () => string, length: number) => string|void + + /** + * Function to validate/transform/replace values while decoding + * @param decodeValue Function to decode value + * @param length Value length/size to validate pre-decoding + * @param type Value type (e.g. 'number', 'string', 'Uint8Array'...) + * @param keyPath Array of keys describing the access path to this value + * @throws Error if length/value is invalid + * @returns An optional replacement value to use + */ + onValue?: ( + decodeValue: () => CBORValue, + length: number, + type: string, + keyPath: (string|number)[] + ) => T|void } diff --git a/src/utils.ts b/src/utils.ts index 1491a5a..09efd8b 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -90,3 +90,74 @@ export function getByteLength(string: string): number { return bytes } + +export function createTransformWithBackpressure( + transform: (chunk: I, enqueue: (out: O) => Promise) => Awaitable, + flush?: (enqueue: (out: O) => Promise) => Awaitable +): ReadableWritablePair { + let readableController: ReadableStreamDefaultController + let pullResolve: (() => void) | null = null + let closed = false + + const enqueue = async (out: O) => { + if (closed) throw new Error('cannot enqueue - stream closed') + readableController.enqueue(out) + await new Promise(res => { + pullResolve = () => { + pullResolve = null + res() + } + }) + } + + const readable = new ReadableStream({ + start(controller) { + readableController = controller + }, + pull() { + pullResolve?.() + }, + cancel() { + closed = true + pullResolve?.() + } + }, { highWaterMark: 1 }) + + const writable = new WritableStream({ + async write(chunk) { + await new Promise(res => setImmediate(res)) // yield + return transform(chunk, enqueue) + }, + async close() { + pullResolve?.() + if (flush) await flush(enqueue) + closed = true + readableController.close() + }, + abort(e) { + closed = true + pullResolve?.() + readableController.error(e) + } + }, { highWaterMark: 1 }) + + return { readable, writable } +} + +export type WithRequired = T & { [P in K]-?: T[P] }; + +export type DeepValueUnion = + T extends readonly (infer E)[] + ? DeepValueUnion + : T extends Record + ? { [K in keyof T]: DeepValueUnion }[keyof T] + : T + +export type Flatten = + | DeepValueUnion + | Flatten[] + | { [K: string]: Flatten } + +export type NoInfer = [T][T extends any ? 0 : never] + +export type Awaitable = T | PromiseLike