diff --git a/.gitkeep b/.gitkeep new file mode 100644 index 0000000..a6e2948 --- /dev/null +++ b/.gitkeep @@ -0,0 +1 @@ +# .gitkeep file auto-generated at 2026-05-10T19:22:27.543Z for PR creation at branch issue-35-03946ff48852 for issue https://github.com/link-foundation/lino-objects-codec/issues/35 \ No newline at end of file diff --git a/README.md b/README.md index d9383eb..3a6c9ba 100644 --- a/README.md +++ b/README.md @@ -73,13 +73,13 @@ npm install lino-objects-codec ``` ```javascript -import { encode, decode } from 'lino-objects-codec'; +import { formatIndented, parseIndented } from "lino-objects-codec"; -// Encode and decode -const data = { name: 'Alice', age: 30, active: true }; -const encoded = encode(data); -const decoded = decode(encoded); -console.log(JSON.stringify(decoded) === JSON.stringify(data)); // true +// Readable indented Links Notation for repository data +const data = { name: "Alice", age: 30, active: true }; +const text = formatIndented({ id: "obj_root", obj: data }); +const { obj } = parseIndented({ text }); +console.log(JSON.stringify(obj) === JSON.stringify(data)); // true ``` ### Rust @@ -166,6 +166,7 @@ All implementations support the same features with language-appropriate syntax: ### Circular References **Python:** + ```python from link_notation_objects_codec import encode, decode @@ -177,8 +178,9 @@ assert decoded[3] is decoded # Reference preserved ``` **JavaScript:** + ```javascript -import { encode, decode } from 'lino-objects-codec'; +import { encode, decode } from "lino-objects-codec"; // Self-referencing array const arr = [1, 2, 3]; @@ -188,6 +190,7 @@ console.log(decoded[3] === decoded); // true - Reference preserved ``` **Rust:** + ```rust use lino_objects_codec::{encode, decode, LinoValue}; @@ -199,6 +202,7 @@ let decoded = decode(&encoded).unwrap(); ``` **C#:** + ```csharp using Lino.Objects.Codec; @@ -212,6 +216,7 @@ Console.WriteLine(ReferenceEquals(decoded, decoded?[0])); // True - Reference pr ### Complex Nested Structures **Python:** + ```python data = { "users": [ @@ -224,18 +229,20 @@ assert decode(encode(data)) == data ``` **JavaScript:** + ```javascript const data = { users: [ - { id: 1, name: 'Alice' }, - { id: 2, name: 'Bob' } + { id: 1, name: "Alice" }, + { id: 2, name: "Bob" }, ], - metadata: { version: 1, count: 2 } + metadata: { version: 1, count: 2 }, }; console.log(JSON.stringify(decode(encode(data))) === JSON.stringify(data)); ``` **Rust:** + ```rust use lino_objects_codec::{encode, decode, LinoValue}; @@ -253,6 +260,7 @@ assert_eq!(decode(&encode(&data)).unwrap(), data); ``` **C#:** + ```csharp var data = new Dictionary { @@ -273,27 +281,34 @@ var decoded = Codec.Decode(Codec.Encode(data)); The indented format provides a human-readable representation for displaying objects: **JavaScript:** + ```javascript -import { formatIndented, parseIndented } from 'lino-objects-codec'; +import { formatIndented, parseIndented } from "lino-objects-codec"; // Format an object with an identifier const formatted = formatIndented({ - id: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', - obj: { uuid: '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019', status: 'executed', command: 'echo test', exitCode: '0' } + id: "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", + obj: { + uuid: "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019", + status: "executed", + command: "echo test", + exitCode: "0", + }, }); console.log(formatted); // Output: -// 6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 -// uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" -// status "executed" -// command "echo test" -// exitCode "0" +// 6dcf4c1b-ff3f-482c-95ab-711ea7d1b019: +// uuid '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019' +// status executed +// command 'echo test' +// exitCode '0' // Parse it back const { id, obj } = parseIndented({ text: formatted }); ``` **Python:** + ```python from link_notation_objects_codec import format_indented, parse_indented @@ -308,6 +323,7 @@ id, obj = parse_indented(formatted) ``` **Rust:** + ```rust use lino_objects_codec::format::{format_indented_ordered, parse_indented}; @@ -320,6 +336,7 @@ let (id, obj) = parse_indented(&formatted).unwrap(); ``` **C#:** + ```csharp using Lino.Objects.Codec; @@ -345,6 +362,7 @@ The library uses the [links-notation](https://github.com/link-foundation/links-n - Circular references use direct object ID references: `obj_0` (without the `ref` keyword) This approach allows for: + - Universal representation of object graphs - Preservation of object identity - Natural handling of circular references using built-in links notation syntax diff --git a/js/.changeset/readable-indented-mode.md b/js/.changeset/readable-indented-mode.md new file mode 100644 index 0000000..962a68d --- /dev/null +++ b/js/.changeset/readable-indented-mode.md @@ -0,0 +1,5 @@ +--- +'lino-objects-codec': minor +--- + +Add recursive readable indented object formatting and parsing for untyped repository data. diff --git a/js/README.md b/js/README.md index 77ea9dc..9b4e07f 100644 --- a/js/README.md +++ b/js/README.md @@ -5,10 +5,11 @@ [![npm downloads](https://img.shields.io/npm/dm/lino-objects-codec.svg)](https://www.npmjs.com/package/lino-objects-codec) [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://unlicense.org/) -A JavaScript library for working with Links Notation format. This library provides: +A JavaScript library for working with Links Notation format. The default documented path is readable recursive indented Links Notation for repository data, with a typed codec available when exact JavaScript type preservation or object identity is required. This library provides: -- Universal serialization/deserialization for JavaScript objects with circular reference support -- JSON to Links Notation conversion utilities +- Readable recursive indented Links Notation for JSON-style objects +- Typed serialization/deserialization for JavaScript object graphs with circular reference support +- Compact JSON to Links Notation conversion utilities - Fuzzy matching utilities for string comparison These tools enable easy implementation of higher-level features like: @@ -18,17 +19,18 @@ These tools enable easy implementation of higher-level features like: ## Features -- **Universal Serialization**: Encode JavaScript objects to Links Notation format -- **Type Support**: Handle all common JavaScript types: +- **Readable Indented Format**: Write nested objects and arrays as reviewable recursive Links Notation definitions with `formatIndented({ id, obj })` +- **Dynamic Parsing**: Parse readable indented data back with `parseIndented({ text })`; quoted references stay strings and unquoted numbers, booleans, and `null` become dynamic values +- **Typed Object Codec**: Encode JavaScript object graphs to Links Notation with type markers when exact type preservation is required +- **Typed Support**: Handle all common JavaScript types: - Basic types: `null`, `undefined`, `boolean`, `number`, `string` - Collections: `Array`, `Object` - Special number values: `NaN`, `Infinity`, `-Infinity` -- **Circular References**: Automatically detect and preserve circular references -- **Object Identity**: Maintain object identity for shared references -- **UTF-8 Support**: Full Unicode string support using base64 encoding -- **Simple API**: Easy-to-use `encode({ obj: )` and `decode({ notation: } })` functions -- **JSON/Lino Conversion**: Convert between JSON and Links Notation with `jsonToLino({ json: )` and `linoToJson({ lino: } })` -- **Reference Escaping**: Properly escape strings for Links Notation format with `escapeReference({ value: )` +- **Circular References**: Automatically detect and preserve circular references in the typed codec +- **Object Identity**: Maintain object identity for shared references in the typed codec +- **UTF-8 Support**: Full Unicode string support in the typed codec using base64 encoding +- **Compact JSON/Lino Conversion**: Convert between JSON and compact Links Notation with `jsonToLino({ json })` and `linoToJson({ lino })` +- **Reference Escaping**: Properly escape strings for Links Notation format with `escapeReference({ value })` - **Fuzzy Matching**: Find similar strings with Levenshtein distance and keyword similarity ## Installation @@ -52,27 +54,79 @@ pnpm add lino-objects-codec ## Quick Start +```javascript +import { formatIndented, parseIndented } from 'lino-objects-codec'; + +const data = { + title: 'Indian Law', + defaultLanguage: 'en', + maxLines: 1500, + nested: { ok: true }, + items: ['a', 1], +}; + +const lino = formatIndented({ id: 'obj_root', obj: data }); +console.log(lino); +// Output: +// obj_root: +// title 'Indian Law' +// defaultLanguage en +// maxLines 1500 +// nested obj_root_nested +// items obj_root_items +// +// obj_root_nested: +// ok true +// +// obj_root_items: +// a +// 1 + +const parsed = parseIndented({ text: lino }); +console.log(parsed.obj.items[1] === 1); +// Output: true +``` + +Use the typed codec when you need exact JavaScript type preservation, circular references, or shared object identity: + ```javascript import { encode, decode } from 'lino-objects-codec'; -// Encode basic types -const encoded = encode({ obj: { name: 'Alice', age: 30, active: true } } } }); -console.log(encoded); -// Output: (object obj_0 ((str bmFt...) (int 30)) ((str YWN0...) (bool true))) +const obj = { name: 'root' }; +obj.self = obj; -// Decode back to JavaScript object -const decoded = decode({ notation: encoded } }); -console.log(decoded); -// Output: { name: 'Alice', age: 30, active: true } +const encoded = encode({ obj }); +const decoded = decode({ notation: encoded }); -// Roundtrip preserves data -console.log(JSON.stringify(decoded) === JSON.stringify({ name: 'Alice', age: 30, active: true })); +console.log(decoded.self === decoded); // Output: true ``` ## Usage Examples -### Basic Types +### Readable Indented Data + +```javascript +import { formatIndented, parseIndented } from 'lino-objects-codec'; + +const data = { + catalog: { + title: 'Indian Law', + languages: ['en', 'hi'], + }, + maxLines: 1500, +}; + +const text = formatIndented({ id: 'obj_root', obj: data }); +const { obj } = parseIndented({ text }); + +console.log(obj.catalog.languages[0]); +// Output: en +``` + +Readable indented data is intentionally untyped and acyclic. Use quoted references for strings that look like numbers, booleans, `null`, or generated definition ids. Use the typed codec below when you need circular references, shared object identity, `undefined`, `NaN`, or exact string/number distinctions in all cases. + +### Typed Basic Types ```javascript import { encode, decode } from 'lino-objects-codec'; @@ -101,7 +155,7 @@ console.log(decode({ notation: encode({ obj: '你好世界 🌍' } }))); // '你 console.log(decode({ notation: encode({ obj: 'multi\nline\nstring' } }))); // 'multi\nline\nstring' ``` -### Collections +### Typed Collections ```javascript import { encode, decode } from 'lino-objects-codec'; @@ -188,7 +242,7 @@ console.log(lino); // Output: ((name Alice) (age 30)) // Convert Links Notation back to JSON -const json = linoToJson({ lino: '((name Alice }) (age 30))'); +const json = linoToJson({ lino: '((name Alice) (age 30))' }); console.log(json); // Output: { name: 'Alice', age: 30 } @@ -247,7 +301,17 @@ const matches = findAllMatches({ question: { question: 'What is your name?', qaD ## How It Works -The library uses the [links-notation](https://github.com/link-foundation/links-notation) format as the serialization target. Each JavaScript object is encoded as a Link with type information: +The library uses the [links-notation](https://github.com/link-foundation/links-notation) format as the serialization target. + +Readable indented mode emits a root definition and a definition for each nested object or non-empty array: + +- Object definitions contain key/value doublets: `title 'Indian Law'` +- Array definitions contain one value per line +- Nested values reference generated definition ids such as `obj_root_items` +- Empty arrays are written as `()` +- Quoted references parse as strings; unquoted references parse dynamically as numbers, booleans, `null`, definition references, or strings + +The typed codec uses explicit type information: - Basic types are encoded with type markers: `(int 42)`, `(str "hello")`, `(bool true)` - Strings are base64-encoded to handle special characters and newlines @@ -266,10 +330,49 @@ This approach allows for: - Universal representation of object graphs - Preservation of object identity - Natural handling of circular references -- Human-readable (somewhat) output +- Exact typed round-trips when readability is less important than preserving JavaScript semantics ## API Reference +### Readable Indented Data + +#### `formatIndented({ id: id, obj: obj, indent: indent })` + +Format a plain object as readable recursive indented Links Notation. + +**Parameters:** + +- `options.id` - Root definition id +- `options.obj` - Plain object to format +- `options.indent` - Optional indentation string, defaulting to two spaces + +**Returns:** + +- Formatted indented Links Notation string + +**Throws:** + +- `Error` - If `id` is missing, `obj` is not a plain object, or a circular reference is found + +```javascript +formatIndented({ + id: 'obj_root', + obj: { title: 'Indian Law', nested: { ok: true }, items: ['a', 1] }, +}); +``` + +#### `parseIndented({ text: text })` + +Parse readable recursive indented Links Notation back to `{ id, obj }`. + +**Parameters:** + +- `options.text` - Indented Links Notation text + +**Returns:** + +- `{ id, obj }`, where `id` is the root definition id and `obj` is the parsed dynamic object + ### Typed Object Codec #### `encode({ obj: obj })` @@ -308,7 +411,7 @@ The main codec class that performs encoding and decoding. The module-level `enco import { ObjectCodec } from 'lino-objects-codec'; const codec = new ObjectCodec(); -const encoded = codec.encode({ data: [1, 2, 3] }); +const encoded = codec.encode({ obj: [1, 2, 3] }); const decoded = codec.decode({ notation: encoded }); ``` @@ -327,7 +430,7 @@ Convert JSON data to Links Notation format. - Links Notation string representation ```javascript -jsonToLino({ name: 'Alice', age: 30 }); +jsonToLino({ json: { name: 'Alice', age: 30 } }); // Returns: ((name Alice) (age 30)) jsonToLino({ json: [1, 2, 3] }); @@ -347,7 +450,7 @@ Convert Links Notation to JSON. - Parsed JSON value ```javascript -linoToJson({ lino: '((name Alice }) (age 30))'); +linoToJson({ lino: '((name Alice) (age 30))' }); // Returns: { name: 'Alice', age: 30 } ``` diff --git a/js/examples/basic_usage.js b/js/examples/basic_usage.js index a64ecc2..9c87872 100644 --- a/js/examples/basic_usage.js +++ b/js/examples/basic_usage.js @@ -2,13 +2,30 @@ * Basic usage examples for lino-objects-codec. */ -import { encode, decode } from '../src/index.js'; +import { encode, decode, formatIndented, parseIndented } from '../src/index.js'; -function main() { - console.log('=== Link Notation Objects Codec Examples ===\n'); +function runReadableIndentedExample() { + console.log('1. Readable Indented Data:'); + const repositoryData = { + title: 'Indian Law', + defaultLanguage: 'en', + maxLines: 1500, + nested: { ok: true }, + items: ['a', 1], + }; + const readableLino = formatIndented({ + id: 'obj_root', + obj: repositoryData, + }); + console.log(readableLino); + const parsedReadable = parseIndented({ text: readableLino }); + console.log( + ` Parsed match: ${JSON.stringify(parsedReadable.obj) === JSON.stringify(repositoryData)}` + ); +} - // Example 1: Basic types - console.log('1. Basic Types:'); +function runTypedBasicValuesExample() { + console.log('\n2. Typed Basic Values:'); const basicExamples = [ null, undefined, @@ -33,9 +50,10 @@ function main() { console.error(` ERROR: Mismatch for ${obj}`); } } +} - // Example 2: Collections - console.log('\n2. Collections:'); +function runTypedCollectionsExample() { + console.log('\n3. Typed Collections:'); const arrayExample = [1, 2, 3, 'hello', true]; const objectExample = { name: 'Alice', age: 30, active: true }; @@ -56,9 +74,10 @@ function main() { console.log( ` Match: ${JSON.stringify(decodedObject) === JSON.stringify(objectExample)}` ); +} - // Example 3: Nested structures - console.log('\n3. Nested Structures:'); +function runTypedNestedStructuresExample() { + console.log('\n4. Typed Nested Structures:'); const nested = { users: [ { id: 1, name: 'Alice', admin: true }, @@ -74,9 +93,10 @@ function main() { console.log( ` Match: ${JSON.stringify(decodedNested) === JSON.stringify(nested)}` ); +} - // Example 4: Circular references - console.log('\n4. Circular References:'); +function runCircularReferencesExample() { + console.log('\n5. Circular References:'); // Self-referencing array const arr = [1, 2, 3]; @@ -109,9 +129,10 @@ function main() { if (decodedObjectCircular.self !== decodedObjectCircular) { console.error(' ERROR: Circular reference not preserved!'); } +} - // Example 5: Shared references - console.log('\n5. Shared Object References:'); +function runSharedReferencesExample() { + console.log('\n6. Shared Object References:'); const shared = { shared: 'data', value: 42 }; const container = { first: shared, second: shared, third: shared }; console.log(' Created container with 3 references to same object'); @@ -136,6 +157,17 @@ function main() { ' ERROR: Modification not visible through shared reference!' ); } +} + +function main() { + console.log('=== Link Notation Objects Codec Examples ===\n'); + + runReadableIndentedExample(); + runTypedBasicValuesExample(); + runTypedCollectionsExample(); + runTypedNestedStructuresExample(); + runCircularReferencesExample(); + runSharedReferencesExample(); console.log('\n=== All examples completed successfully! ==='); } diff --git a/js/src/format.js b/js/src/format.js index a42479a..62e896b 100644 --- a/js/src/format.js +++ b/js/src/format.js @@ -334,40 +334,164 @@ export function formatAsLino(options = {}) { } /** - * Format a value for display in indented Links Notation. - * Uses quoting strategy compatible with the links-notation parser: - * - If value contains double quotes, wrap in single quotes - * - Otherwise, wrap in double quotes + * Quote a reference even when Links Notation would allow it to remain bare. + * + * @private + * @param {string} value - The reference value to quote + * @returns {string} Quoted reference + */ +function quoteReference(value) { + const str = String(value); + + if (str.includes("'") && !str.includes('"')) { + return `"${str}"`; + } + + if (str.includes('"') && !str.includes("'")) { + return `'${str}'`; + } + + if (str.includes("'") && str.includes('"')) { + return `'${str.replace(/'/g, "''")}'`; + } + + return `'${str}'`; +} + +/** + * Check whether a bare string would be parsed as a dynamic primitive. + * + * @private + * @param {string} value - The string value to check + * @returns {boolean} True when the string should be quoted to remain a string + */ +function isAmbiguousStringReference(value) { + if (value === '') { + return true; + } + + if (value === 'true' || value === 'false' || value === 'null') { + return true; + } + + const num = Number(value); + return !isNaN(num) && value.trim() !== ''; +} + +/** + * Format a scalar value for display in indented Links Notation. * * @private * @param {*} value - The value to format - * @returns {string} Formatted value with appropriate quotes + * @param {Set} [reservedReferences] - References that should remain unambiguous strings + * @returns {string} Formatted scalar value */ -function formatIndentedValue(value) { +function formatIndentedScalarValue(value, reservedReferences = new Set()) { if (value === null || value === undefined) { - return '"null"'; + return 'null'; } - const str = String(value); + if (typeof value === 'number' || typeof value === 'boolean') { + return String(value); + } - // If contains double quotes but no single quotes, use single quotes - if (str.includes('"') && !str.includes("'")) { - return `'${str}'`; + if (typeof value === 'string') { + if (reservedReferences.has(value) || isAmbiguousStringReference(value)) { + return quoteReference(value); + } + reservedReferences.add(value); + return escapeReference({ value }); } - // If contains single quotes but no double quotes, use double quotes - if (str.includes("'") && !str.includes('"')) { - return `"${str}"`; + return escapeReference({ value: String(value) }); +} + +/** + * Sanitize an object key/path segment into a stable Links Notation id segment. + * + * @private + * @param {*} value - Value to turn into an id segment + * @returns {string} Sanitized id segment + */ +function sanitizeIdentifierPart(value) { + const sanitized = String(value) + .replace(/[^A-Za-z0-9_-]+/g, '_') + .replace(/^_+|_+$/g, ''); + + return sanitized || 'item'; +} + +/** + * Create a unique child definition id for a nested object or array. + * + * @private + * @param {string} parentId - Parent definition id + * @param {string|number} key - Child key or array index + * @param {Set} usedIds - Already used definition ids + * @returns {string} Unique child definition id + */ +function createChildId(parentId, key, usedIds) { + const baseId = `${sanitizeIdentifierPart(parentId)}_${sanitizeIdentifierPart(key)}`; + let candidate = baseId; + let counter = 2; + + while (usedIds.has(candidate)) { + candidate = `${baseId}_${counter}`; + counter += 1; } - // If contains both, use single quotes and escape internal single quotes - if (str.includes("'") && str.includes('"')) { - const escaped = str.replace(/'/g, "''"); - return `'${escaped}'`; + usedIds.add(candidate); + return candidate; +} + +/** + * Check whether a value should be emitted as a separate indented definition. + * + * @private + * @param {*} value - The value to check + * @returns {boolean} True for non-null objects and non-empty arrays + */ +function shouldUseIndentedDefinition(value) { + if (value === null || typeof value !== 'object') { + return false; } - // Default: use double quotes - return `"${str}"`; + if (Array.isArray(value)) { + return value.length > 0; + } + + return true; +} + +/** + * Format a child value, creating a queued definition for nested objects/arrays. + * + * @private + * @param {*} value - Child value + * @param {string} parentId - Parent definition id + * @param {string|number} key - Child key or array index + * @param {Array<{ id: string, value: * }>} childDefinitions - Queued children + * @param {Set} usedIds - Already used definition ids + * @returns {string} Formatted scalar value or child definition reference + */ +function formatIndentedChildValue( + value, + parentId, + key, + childDefinitions, + usedIds +) { + if (Array.isArray(value) && value.length === 0) { + return '()'; + } + + if (shouldUseIndentedDefinition(value)) { + const childId = createChildId(parentId, key, usedIds); + childDefinitions.push({ id: childId, value }); + return escapeReference({ value: childId }); + } + + return formatIndentedScalarValue(value, usedIds); } /** @@ -375,9 +499,12 @@ function formatIndentedValue(value) { * * This format is designed for human readability, displaying objects as: * ``` - * - * "" - * "" + * : + * + * + * + * : + * * ... * ``` * @@ -388,11 +515,11 @@ function formatIndentedValue(value) { * }) * * Returns: - * 6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 - * uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019" - * status "executed" - * command "echo test" - * exitCode "0" + * 6dcf4c1b-ff3f-482c-95ab-711ea7d1b019: + * uuid '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019' + * status executed + * command 'echo test' + * exitCode '0' * * @param {Object} options - Options * @param {string} options.id - The object identifier (displayed on first line) @@ -411,36 +538,314 @@ export function formatIndented(options = {}) { throw new Error('obj must be a plain object for formatIndented'); } - const lines = [id]; + const rootId = String(id); + const usedIds = new Set([rootId]); + const sections = []; + + function formatDefinition(definitionId, value, ancestors = new Set()) { + if (value !== null && typeof value === 'object') { + if (ancestors.has(value)) { + throw new Error( + 'formatIndented does not support circular references; use encode/decode for typed object graphs' + ); + } + ancestors = new Set([...ancestors, value]); + } + + const childDefinitions = []; + const lines = [`${escapeReference({ value: definitionId })}:`]; + + if (Array.isArray(value)) { + for (let index = 0; index < value.length; index += 1) { + const formattedValue = formatIndentedChildValue( + value[index], + definitionId, + index, + childDefinitions, + usedIds + ); + lines.push(`${indent}${formattedValue}`); + } + } else { + for (const [key, childValue] of Object.entries(value)) { + const escapedKey = escapeReference({ value: key }); + const formattedValue = formatIndentedChildValue( + childValue, + definitionId, + key, + childDefinitions, + usedIds + ); + lines.push(`${indent}${escapedKey} ${formattedValue}`); + } + } + + sections.push(lines.join('\n')); + + for (const childDefinition of childDefinitions) { + formatDefinition(childDefinition.id, childDefinition.value, ancestors); + } + } + + formatDefinition(rootId, obj); + return sections.join('\n\n'); +} + +/** + * Find a definition header colon outside quoted text. + * + * @private + * @param {string} line - Header line + * @returns {number} Colon index, or -1 when there is no header colon + */ +function findHeaderColon(line) { + let quote = null; + + for (let index = 0; index < line.length; index += 1) { + const char = line[index]; + + if (quote) { + if (char === quote) { + if (line[index + 1] === quote) { + index += 1; + } else { + quote = null; + } + } + continue; + } + + if (char === "'" || char === '"' || char === '`') { + quote = char; + continue; + } + + if (char === ':') { + return index; + } + } + + return -1; +} + +/** + * Check whether a character starts/ends a quoted reference. + * + * @private + * @param {string} char - Character to check + * @returns {boolean} True for supported quote characters + */ +function isIndentedQuote(char) { + return char === "'" || char === '"' || char === '`'; +} + +/** + * Skip whitespace starting at index. + * + * @private + * @param {string} line - Line to scan + * @param {number} index - Starting index + * @returns {number} First non-whitespace index + */ +function skipIndentedWhitespace(line, index) { + while (index < line.length && /\s/.test(line[index])) { + index += 1; + } + + return index; +} + +/** + * Read a quoted indented reference. + * + * @private + * @param {string} line - Line to scan + * @param {number} index - Index of the opening quote + * @returns {{ token: { value: string, quoted: boolean }, nextIndex: number }} Parsed token and next index + */ +function readQuotedIndentedReference(line, index) { + const quote = line[index]; + let value = ''; + index += 1; + + while (index < line.length) { + const char = line[index]; + + if (char !== quote) { + value += char; + index += 1; + continue; + } + + if (line[index + 1] === quote) { + value += quote; + index += 2; + continue; + } + + index += 1; + if (index < line.length && !/\s/.test(line[index])) { + throw new Error(`Invalid quoted reference: ${line}`); + } + + return { + token: { value, quoted: true }, + nextIndex: index, + }; + } + + throw new Error(`Unterminated quoted reference: ${line}`); +} - for (const [key, value] of Object.entries(obj)) { - const escapedKey = escapeReference({ value: key }); - const formattedValue = formatIndentedValue(value); - lines.push(`${indent}${escapedKey} ${formattedValue}`); +/** + * Read a bare indented reference. + * + * @private + * @param {string} line - Line to scan + * @param {number} index - Starting index + * @returns {{ token: { value: string, quoted: boolean }, nextIndex: number }} Parsed token and next index + */ +function readBareIndentedReference(line, index) { + const start = index; + + while (index < line.length && !/\s/.test(line[index])) { + index += 1; } - return lines.join('\n'); + return { + token: { value: line.slice(start, index), quoted: false }, + nextIndex: index, + }; +} + +/** + * Tokenize one indented Links Notation line into references while preserving + * whether each reference was quoted. + * + * @private + * @param {string} line - Line content without leading indentation + * @returns {Array<{ value: string, quoted: boolean }>} Parsed references + */ +function tokenizeIndentedReferences(line) { + const tokens = []; + let index = 0; + + while (index < line.length) { + index = skipIndentedWhitespace(line, index); + + if (index >= line.length) { + break; + } + + const result = isIndentedQuote(line[index]) + ? readQuotedIndentedReference(line, index) + : readBareIndentedReference(line, index); + tokens.push(result.token); + index = result.nextIndex; + } + + return tokens; +} + +/** + * Parse a definition header line. + * + * @private + * @param {string} line - Header line without leading/trailing whitespace + * @returns {string} Definition id + */ +function parseIndentedDefinitionId(line) { + const colonIndex = findHeaderColon(line); + const idText = colonIndex === -1 ? line : line.slice(0, colonIndex).trimEnd(); + + if (colonIndex !== -1 && line.slice(colonIndex + 1).trim() !== '') { + throw new Error(`Invalid definition header: ${line}`); + } + + const tokens = tokenizeIndentedReferences(idText); + + if (tokens.length !== 1) { + throw new Error(`Invalid definition header: ${line}`); + } + + return tokens[0].value; +} + +/** + * Split indented Links Notation text into top-level definitions. + * + * @private + * @param {string} text - Indented Links Notation text + * @returns {Array<{ id: string, childLines: string[] }>} Parsed definitions + */ +function splitIndentedDefinitions(text) { + const definitions = []; + let currentDefinition = null; + + for (const rawLine of text.split(/\r?\n/)) { + if (!rawLine.trim()) { + continue; + } + + if (/^\s/.test(rawLine)) { + if (!currentDefinition) { + throw new Error('Indented value found before a definition header'); + } + + currentDefinition.childLines.push(rawLine.trim()); + continue; + } + + currentDefinition = { + id: parseIndentedDefinitionId(rawLine.trim()), + childLines: [], + }; + definitions.push(currentDefinition); + } + + return definitions; +} + +/** + * Parse a token into a dynamic scalar. + * + * @private + * @param {{ value: string, quoted: boolean }} token - Token to parse + * @returns {*} Parsed scalar value + */ +function parseIndentedScalarToken(token) { + if (token.quoted) { + return token.value; + } + + if (token.value === '()') { + return []; + } + + return parseReference(token.value); } /** * Parse an indented Links Notation string back to an object. * - * This function uses the links-notation parser for proper parsing, - * supporting the standard Links Notation indented syntax. + * This parser handles the recursive definition format emitted by + * formatIndented. Quoted references stay strings, while unquoted references are + * parsed dynamically as numbers, booleans, null, definition references, or + * strings. * * Parses strings like: * ``` - * - * "" - * "" + * : + * + * + * + * : + * * ... * ``` * - * The format with colon after identifier is also supported (standard lino): - * ``` - * : - * "" - * ``` + * The legacy flat format without a colon after the root identifier is also + * accepted. * * @param {Object} options - Options * @param {string} options.text - The indented Links Notation string to parse @@ -453,61 +858,85 @@ export function parseIndented(options = {}) { throw new Error('text is required for parseIndented'); } - const lines = text.split('\n'); - if (lines.length === 0) { - throw new Error('text must have at least one line (the identifier)'); - } + const definitions = splitIndentedDefinitions(text); - // Filter out empty lines to preserve indentation structure for the parser - // Empty lines would break the indentation context in links-notation - const nonEmptyLines = lines.filter((line) => line.trim()); - - if (nonEmptyLines.length === 0) { + if (definitions.length === 0) { throw new Error( 'text must have at least one non-empty line (the identifier)' ); } - // Convert to standard lino format by adding colon after first line if not present - // This allows the links-notation parser to properly parse the indented structure - const firstLine = nonEmptyLines[0].trim(); - let linoText; - if (!firstLine.endsWith(':')) { - linoText = `${firstLine}:\n${nonEmptyLines.slice(1).join('\n')}`; - } else { - linoText = nonEmptyLines.join('\n'); + const definitionMap = new Map(); + for (const definition of definitions) { + if (definitionMap.has(definition.id)) { + throw new Error(`Duplicate indented definition id: ${definition.id}`); + } + definitionMap.set(definition.id, definition); } - // Use links-notation parser - const parsed = parser.parse(linoText); + const parsedDefinitions = new Map(); + for (const definition of definitions) { + const childTokens = definition.childLines.map((line) => + tokenizeIndentedReferences(line) + ); - if (!parsed || parsed.length === 0) { - throw new Error('Failed to parse indented Links Notation'); + for (const tokens of childTokens) { + if (tokens.length !== 1 && tokens.length !== 2) { + throw new Error(`Invalid indented value line in ${definition.id}`); + } + } + + const isArray = + childTokens.length > 0 && + childTokens.every((tokens) => tokens.length === 1); + const isObject = childTokens.every((tokens) => tokens.length === 2); + + if (!isArray && !isObject) { + throw new Error( + `Cannot mix array items and object pairs in ${definition.id}` + ); + } + + parsedDefinitions.set(definition.id, { + isArray, + childTokens, + }); } - // Extract id and key-value pairs from parsed result - const mainLink = parsed[0]; - const id = mainLink.id || ''; - const obj = {}; + const decodedDefinitions = new Map(); - // Process the values array - each entry is a doublet (key value) - for (const child of mainLink.values || []) { - if (child.values && child.values.length === 2) { - const keyRef = child.values[0]; - const valueRef = child.values[1]; + function decodeToken(token) { + if (!token.quoted && definitionMap.has(token.value)) { + return decodeDefinition(token.value); + } - // Get key string - const key = keyRef.id || ''; + return parseIndentedScalarToken(token); + } - // Get value string, handling null - const valueStr = valueRef.id; - if (valueStr === 'null') { - obj[key] = null; - } else { - obj[key] = valueStr; + function decodeDefinition(definitionId) { + if (decodedDefinitions.has(definitionId)) { + return decodedDefinitions.get(definitionId); + } + + const parsedDefinition = parsedDefinitions.get(definitionId); + const result = parsedDefinition.isArray ? [] : {}; + decodedDefinitions.set(definitionId, result); + + if (parsedDefinition.isArray) { + for (const tokens of parsedDefinition.childTokens) { + result.push(decodeToken(tokens[0])); } + return result; } + + for (const tokens of parsedDefinition.childTokens) { + const key = tokens[0].value; + result[key] = decodeToken(tokens[1]); + } + + return result; } - return { id, obj }; + const rootId = definitions[0].id; + return { id: rootId, obj: decodeDefinition(rootId) }; } diff --git a/js/src/index.js b/js/src/index.js index b6716ab..7910fa7 100644 --- a/js/src/index.js +++ b/js/src/index.js @@ -2,8 +2,9 @@ * Lino Objects Codec - Universal serializer/deserializer for JavaScript objects. * * This library provides: - * - Typed serialization/deserialization of JavaScript objects to/from Links Notation format - * - Support for circular references and complex object graphs + * - Readable recursive indented Links Notation for JSON-style repository data + * - Typed serialization/deserialization for exact JavaScript object graphs + * - Typed support for circular references and shared object identity * - JSON to Links Notation conversion utilities * - Fuzzy matching utilities for string comparison * @@ -17,7 +18,7 @@ // Typed object codec (preserves types with markers like (int 42), (str base64)) export { ObjectCodec, encode, decode } from './codec.js'; -// Formatting utilities for JSON/Lino conversion +// Formatting utilities for readable indented data and compact JSON/Lino conversion export { escapeReference, unescapeReference, diff --git a/js/tests/test_format.test.js b/js/tests/test_format.test.js index f7610d1..ae04f92 100644 --- a/js/tests/test_format.test.js +++ b/js/tests/test_format.test.js @@ -266,11 +266,11 @@ test('formatIndented - basic object', () => { }, }); const lines = result.split('\n'); - assert.equal(lines[0], '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019'); - assert.equal(lines[1], ' uuid "6dcf4c1b-ff3f-482c-95ab-711ea7d1b019"'); - assert.equal(lines[2], ' status "executed"'); - assert.equal(lines[3], ' command "echo test"'); - assert.equal(lines[4], ' exitCode "0"'); + assert.equal(lines[0], '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019:'); + assert.equal(lines[1], " uuid '6dcf4c1b-ff3f-482c-95ab-711ea7d1b019'"); + assert.equal(lines[2], ' status executed'); + assert.equal(lines[3], " command 'echo test'"); + assert.equal(lines[4], " exitCode '0'"); }); test('formatIndented - custom indentation', () => { @@ -280,8 +280,8 @@ test('formatIndented - custom indentation', () => { indent: ' ', // 4 spaces }); const lines = result.split('\n'); - assert.equal(lines[0], 'test-id'); - assert.equal(lines[1], ' key "value"'); + assert.equal(lines[0], 'test-id:'); + assert.equal(lines[1], ' key value'); }); test('formatIndented - value with double quotes', () => { @@ -291,7 +291,7 @@ test('formatIndented - value with double quotes', () => { obj: { message: 'He said "hello"' }, }); const lines = result.split('\n'); - assert.equal(lines[0], 'test-id'); + assert.equal(lines[0], 'test-id:'); assert.equal(lines[1], ` message 'He said "hello"'`); }); @@ -301,7 +301,7 @@ test('formatIndented - key with space', () => { obj: { 'key with space': 'value' }, }); const lines = result.split('\n'); - assert.equal(lines[0], 'test-id'); + assert.equal(lines[0], 'test-id:'); assert.ok( lines[1].includes("'key with space'") || lines[1].includes('"key with space"') @@ -314,8 +314,8 @@ test('formatIndented - null value', () => { obj: { key: null }, }); const lines = result.split('\n'); - assert.equal(lines[0], 'test-id'); - assert.equal(lines[1], ' key "null"'); + assert.equal(lines[0], 'test-id:'); + assert.equal(lines[1], ' key null'); }); test('formatIndented - requires id', () => { @@ -330,6 +330,36 @@ test('formatIndented - requires plain object', () => { }); }); +test('formatIndented - recursively formats nested objects and arrays', () => { + const result = formatIndented({ + id: 'obj_root', + obj: { + title: 'Indian Law', + defaultLanguage: 'en', + maxLines: 1500, + nested: { ok: true }, + items: ['a', 1], + }, + }); + + assert.equal( + result, + `obj_root: + title 'Indian Law' + defaultLanguage en + maxLines 1500 + nested obj_root_nested + items obj_root_items + +obj_root_nested: + ok true + +obj_root_items: + a + 1` + ); +}); + // Tests for parseIndented test('parseIndented - basic object', () => { const text = `6dcf4c1b-ff3f-482c-95ab-711ea7d1b019 @@ -369,6 +399,33 @@ test('parseIndented - empty lines are skipped', () => { assert.equal(result.obj.another, 'value2'); }); +test('parseIndented - resolves recursive object and array definitions', () => { + const text = `obj_root: + title 'Indian Law' + defaultLanguage en + maxLines 1500 + nested obj_root_nested + items obj_root_items + +obj_root_nested: + ok true + +obj_root_items: + a + 1`; + + const result = parseIndented({ text }); + + assert.equal(result.id, 'obj_root'); + assert.deepEqual(result.obj, { + title: 'Indian Law', + defaultLanguage: 'en', + maxLines: 1500, + nested: { ok: true }, + items: ['a', 1], + }); +}); + test('parseIndented - requires text', () => { assert.throws(() => parseIndented({}), { message: 'text is required for parseIndented', @@ -406,3 +463,23 @@ test('formatIndented/parseIndented roundtrip - with quotes', () => { assert.equal(parsed.id, original.id); assert.deepEqual(parsed.obj, original.obj); }); + +test('formatIndented/parseIndented roundtrip - ambiguous strings and empty containers', () => { + const original = { + id: 'obj_root', + obj: { + idString: 'obj_root', + numberString: '1500', + booleanString: 'true', + nullString: 'null', + emptyArray: [], + emptyObject: {}, + }, + }; + + const formatted = formatIndented(original); + const parsed = parseIndented({ text: formatted }); + + assert.equal(parsed.id, original.id); + assert.deepEqual(parsed.obj, original.obj); +});