diff --git a/README.md b/README.md index f419e67..e5bc3c8 100644 --- a/README.md +++ b/README.md @@ -125,10 +125,33 @@ for (const event of parser) { } ``` + +##### Cursor-style Synchronous Parsing (StaxXmlCursorReaderSync) + +```typescript +import { StaxXmlCursorReaderSync, XmlEventType } from 'stax-xml'; + +const reader = new StaxXmlCursorReaderSync('text'); + +while (reader.read()) { + const event = reader.requireEvent(); + + if (event.type === XmlEventType.START_ELEMENT) { + console.log(event.name, event.getAttributeCount()); + console.log(event.getAttributeValue('id')); + } + + if (event.type === XmlEventType.CHARACTERS) { + console.log(event.text); + } +} +``` + For detailed API documentation: - [**Converter API Guide**](https://clickin.github.io/stax-xml): Declarative parsing with schemas - [**StaxXmlParser (Asynchronous)**](https://clickin.github.io/stax-xml): Event-based parsing from streams - [**StaxXmlParserSync (Synchronous)**](https://clickin.github.io/stax-xml): Event-based parsing from strings +- **StaxXmlCursorReaderSync (Synchronous Cursor)**: Cursor-style pull reader from strings ### 🌐 Platform Compatibility diff --git a/packages/stax-xml/README.md b/packages/stax-xml/README.md index f419e67..e5bc3c8 100644 --- a/packages/stax-xml/README.md +++ b/packages/stax-xml/README.md @@ -125,10 +125,33 @@ for (const event of parser) { } ``` + +##### Cursor-style Synchronous Parsing (StaxXmlCursorReaderSync) + +```typescript +import { StaxXmlCursorReaderSync, XmlEventType } from 'stax-xml'; + +const reader = new StaxXmlCursorReaderSync('text'); + +while (reader.read()) { + const event = reader.requireEvent(); + + if (event.type === XmlEventType.START_ELEMENT) { + console.log(event.name, event.getAttributeCount()); + console.log(event.getAttributeValue('id')); + } + + if (event.type === XmlEventType.CHARACTERS) { + console.log(event.text); + } +} +``` + For detailed API documentation: - [**Converter API Guide**](https://clickin.github.io/stax-xml): Declarative parsing with schemas - [**StaxXmlParser (Asynchronous)**](https://clickin.github.io/stax-xml): Event-based parsing from streams - [**StaxXmlParserSync (Synchronous)**](https://clickin.github.io/stax-xml): Event-based parsing from strings +- **StaxXmlCursorReaderSync (Synchronous Cursor)**: Cursor-style pull reader from strings ### 🌐 Platform Compatibility diff --git a/packages/stax-xml/src/AttrStore.ts b/packages/stax-xml/src/AttrStore.ts new file mode 100644 index 0000000..83b75c2 --- /dev/null +++ b/packages/stax-xml/src/AttrStore.ts @@ -0,0 +1,64 @@ +import { AttributeInfo, StartElementEvent } from './types'; +import { CursorAttribute } from './types'; + +function parseAttributeName(name: string): Pick { + const colonIndex = name.indexOf(':'); + if (colonIndex === -1) { + return { localName: name, prefix: undefined }; + } + + return { + prefix: name.slice(0, colonIndex), + localName: name.slice(colonIndex + 1) + }; +} + +export class AttrStore { + private readonly list: CursorAttribute[]; + + constructor(startEvent: StartElementEvent) { + this.list = AttrStore.fromEvent(startEvent); + } + + get count(): number { + return this.list.length; + } + + getByIndex(index: number): CursorAttribute | undefined { + return this.list[index]; + } + + getByName(name: string): CursorAttribute | undefined { + return this.list.find((attr) => attr.name === name); + } + + toArray(): CursorAttribute[] { + return this.list; + } + + private static fromEvent(event: StartElementEvent): CursorAttribute[] { + if (event.attributesWithPrefix) { + return Object.entries(event.attributesWithPrefix).map(([name, info]) => { + const typedInfo = info as AttributeInfo; + return { + name, + localName: typedInfo.localName, + prefix: typedInfo.prefix, + uri: typedInfo.uri, + value: typedInfo.value + } satisfies CursorAttribute; + }); + } + + return Object.entries(event.attributes).map(([name, value]) => { + const parsedName = parseAttributeName(name); + return { + name, + localName: parsedName.localName, + prefix: parsedName.prefix, + uri: undefined, + value + } satisfies CursorAttribute; + }); + } +} diff --git a/packages/stax-xml/src/StaxXmlCursorReaderSync.ts b/packages/stax-xml/src/StaxXmlCursorReaderSync.ts new file mode 100644 index 0000000..aba2206 --- /dev/null +++ b/packages/stax-xml/src/StaxXmlCursorReaderSync.ts @@ -0,0 +1,67 @@ +import { StaxXmlParserSync, StaxXmlParserSyncOptions } from './StaxXmlParserSync'; +import { AnyXmlEvent, XmlCursorReaderSyncLike } from './types'; +import { XmlCursorEvent } from './XmlCursorEvent'; + +export interface StaxXmlCursorReaderSyncOptions extends StaxXmlParserSyncOptions {} + +export class StaxXmlCursorReaderSync implements XmlCursorReaderSyncLike { + private readonly iterator: Iterator; + private buffer: IteratorResult | null = null; + private finished: boolean = false; + private current: XmlCursorEvent | null = null; + + constructor(xml: string, options: StaxXmlCursorReaderSyncOptions = {}) { + this.iterator = new StaxXmlParserSync(xml, options); + } + + hasNext(): boolean { + if (this.finished) { + return false; + } + + if (this.buffer !== null) { + return !this.buffer.done; + } + + this.buffer = this.iterator.next(); + return !this.buffer.done; + } + + read(): boolean { + if (this.finished) { + this.current = null; + return false; + } + + const result = this.buffer ?? this.iterator.next(); + this.buffer = null; + + if (result.done) { + this.finished = true; + this.current = null; + return false; + } + + this.current = new XmlCursorEvent(result.value); + return true; + } + + getEvent(): XmlCursorEvent | null { + return this.current; + } + + requireEvent(): XmlCursorEvent { + if (!this.current) { + throw new Error('No active cursor event. Call read() first.'); + } + + return this.current; + } +} + +export function createStaxXmlCursorReaderSync( + xml: string, + options: StaxXmlCursorReaderSyncOptions = {} +): StaxXmlCursorReaderSync { + return new StaxXmlCursorReaderSync(xml, options); +} diff --git a/packages/stax-xml/src/XmlCursorEvent.ts b/packages/stax-xml/src/XmlCursorEvent.ts new file mode 100644 index 0000000..b96ac84 --- /dev/null +++ b/packages/stax-xml/src/XmlCursorEvent.ts @@ -0,0 +1,85 @@ +import { AttrStore } from './AttrStore'; +import { + AnyXmlEvent, + CursorAttribute, + CursorXmlEventType, + StartElementEvent, + XmlEventType, + isStartElement +} from './types'; + +export class XmlCursorEvent { + private readonly attrStore: AttrStore | null; + + constructor(private readonly raw: AnyXmlEvent) { + this.attrStore = isStartElement(raw) ? new AttrStore(raw as StartElementEvent) : null; + } + + get type(): CursorXmlEventType { + return this.raw.type; + } + + get name(): string | undefined { + return 'name' in this.raw ? this.raw.name : undefined; + } + + get localName(): string | undefined { + return 'localName' in this.raw ? this.raw.localName : undefined; + } + + get prefix(): string | undefined { + return 'prefix' in this.raw ? this.raw.prefix : undefined; + } + + get uri(): string | undefined { + return 'uri' in this.raw ? this.raw.uri : undefined; + } + + get text(): string | undefined { + return 'value' in this.raw ? this.raw.value : undefined; + } + + get error(): Error | undefined { + return 'error' in this.raw ? this.raw.error : undefined; + } + + isStartElement(): boolean { + return this.type === XmlEventType.START_ELEMENT; + } + + isEndElement(): boolean { + return this.type === XmlEventType.END_ELEMENT; + } + + isCharacters(): boolean { + return this.type === XmlEventType.CHARACTERS; + } + + isCdata(): boolean { + return this.type === XmlEventType.CDATA; + } + + isStartDocument(): boolean { + return this.type === XmlEventType.START_DOCUMENT; + } + + isEndDocument(): boolean { + return this.type === XmlEventType.END_DOCUMENT; + } + + getAttributeCount(): number { + return this.attrStore?.count ?? 0; + } + + getAttribute(index: number): CursorAttribute | undefined { + return this.attrStore?.getByIndex(index); + } + + getAttributeValue(name: string): string | undefined { + return this.attrStore?.getByName(name)?.value; + } + + toEvent(): AnyXmlEvent { + return this.raw; + } +} diff --git a/packages/stax-xml/src/index.ts b/packages/stax-xml/src/index.ts index ad5e34f..a08ca7e 100644 --- a/packages/stax-xml/src/index.ts +++ b/packages/stax-xml/src/index.ts @@ -1,10 +1,12 @@ export * from "./StaxXmlParser.js"; export * from "./StaxXmlParserSync.js"; +export * from "./StaxXmlCursorReaderSync.js"; export * from "./StaxXmlWriter.js"; export * from "./StaxXmlWriterSync.js"; +export * from "./XmlCursorEvent.js"; export { isCdata, isCharacters, isEndDocument, isEndElement, isError, isStartDocument, isStartElement, XmlEventType } from "./types.js"; export type { - AnyXmlEvent, CdataEvent, CharactersEvent, ErrorEvent, StartElementEvent, WriteElementOptions, XmlAttribute + AnyXmlEvent, CdataEvent, CharactersEvent, CursorAttribute, CursorXmlEventType, ErrorEvent, StartElementEvent, WriteElementOptions, XmlAttribute, + XmlCursorEventLike, XmlCursorReaderSyncLike } from "./types.js"; - diff --git a/packages/stax-xml/src/types.ts b/packages/stax-xml/src/types.ts index 8957b8f..4966761 100644 --- a/packages/stax-xml/src/types.ts +++ b/packages/stax-xml/src/types.ts @@ -108,6 +108,56 @@ export type AnyXmlEvent = | CdataEvent | ErrorEvent; +/** + * Cursor-compatible XML event type. + * + * @public + */ +export type CursorXmlEventType = XmlEventType; + +/** + * Attribute shape used by the cursor API. + * + * @public + */ +export interface CursorAttribute { + name: string; + localName: string; + prefix?: string; + uri?: string; + value: string; +} + +/** + * Read-only view for cursor events. + * + * @public + */ +export interface XmlCursorEventLike { + readonly type: CursorXmlEventType; + readonly name: string | undefined; + readonly localName: string | undefined; + readonly prefix: string | undefined; + readonly uri: string | undefined; + readonly text: string | undefined; + readonly error: Error | undefined; + getAttributeCount(): number; + getAttribute(index: number): CursorAttribute | undefined; + getAttributeValue(name: string): string | undefined; +} + +/** + * Synchronous cursor-style reader API. + * + * @public + */ +export interface XmlCursorReaderSyncLike { + hasNext(): boolean; + read(): boolean; + getEvent(): XmlCursorEventLike | null; + requireEvent(): XmlCursorEventLike; +} + /** * Attribute interface (for Writer) */ @@ -357,4 +407,4 @@ export interface WriteElementOptions { attributes?: Record; selfClosing?: boolean; comment?: string; -} \ No newline at end of file +} diff --git a/packages/stax-xml/test/converter/large-file.test.ts b/packages/stax-xml/test/converter/large-file.test.ts index 27e1b91..eba10ed 100644 --- a/packages/stax-xml/test/converter/large-file.test.ts +++ b/packages/stax-xml/test/converter/large-file.test.ts @@ -323,7 +323,7 @@ describe('Large File Tests', () => { describe('Performance Benchmarks', () => { it('should maintain performance with various data sizes', () => { - const sizes = [100, 500, 1000]; + const sizes = [200, 1000, 3000]; const benchmarks: { size: number; duration: number }[] = []; sizes.forEach(size => { @@ -335,21 +335,39 @@ describe('Large File Tests', () => { const schema = x.array(x.number(), '//item'); - const start = performance.now(); - const result = schema.parseSync(xml); - const duration = performance.now() - start; + // Warm-up once to reduce JIT/cache noise across CI runners. + schema.parseSync(xml); + + const samples: number[] = []; + let result: number[] = []; + + for (let sample = 0; sample < 5; sample++) { + const start = performance.now(); + result = schema.parseSync(xml); + samples.push(performance.now() - start); + } + + const duration = samples.sort((a, b) => a - b)[Math.floor(samples.length / 2)]; benchmarks.push({ size, duration }); expect(result).toHaveLength(size); }); - // Performance should scale reasonably (not exponentially) + // Performance should scale reasonably (not exponentially). + // Use generous bounds because CI machines (especially macOS shared runners) + // can show bursty timing variance. benchmarks.forEach((benchmark, i) => { if (i > 0) { const ratio = benchmark.duration / benchmarks[i - 1].duration; const sizeRatio = benchmark.size / benchmarks[i - 1].size; - // Duration ratio should be less than size ratio squared (sub-quadratic) - expect(ratio).toBeLessThan(sizeRatio * sizeRatio); + + // Sub-quadratic upper bound with CI tolerance. + expect(ratio).toBeLessThan(sizeRatio * sizeRatio * 1.5); + + // Per-item processing time should not regress excessively between sizes. + const perItem = benchmark.duration / benchmark.size; + const previousPerItem = benchmarks[i - 1].duration / benchmarks[i - 1].size; + expect(perItem).toBeLessThan(previousPerItem * 3); } }); }); @@ -437,4 +455,4 @@ describe('Large File Tests', () => { expect(result[0].content).toContain('< > & " \''); }); }); -}); \ No newline at end of file +}); diff --git a/packages/stax-xml/test/cursor-reader-sync.test.ts b/packages/stax-xml/test/cursor-reader-sync.test.ts new file mode 100644 index 0000000..a9afa04 --- /dev/null +++ b/packages/stax-xml/test/cursor-reader-sync.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from 'vitest'; + +import { createStaxXmlCursorReaderSync, StaxXmlCursorReaderSync } from '../src/StaxXmlCursorReaderSync'; +import { XmlEventType } from '../src/types'; + +describe('StaxXmlCursorReaderSync', () => { + it('reads events sequentially and exposes cursor accessors', () => { + const reader = new StaxXmlCursorReaderSync(''); + + const types: string[] = []; + + while (reader.read()) { + const event = reader.requireEvent(); + types.push(event.type); + + if (event.type === XmlEventType.START_ELEMENT && event.name === 'root') { + expect(event.getAttributeCount()).toBe(1); + expect(event.getAttribute(0)?.name).toBe('id'); + expect(event.getAttribute(0)?.value).toBe('1'); + expect(event.getAttributeValue('id')).toBe('1'); + } + } + + expect(types).toEqual([ + XmlEventType.START_DOCUMENT, + XmlEventType.START_ELEMENT, + XmlEventType.START_ELEMENT, + XmlEventType.END_ELEMENT, + XmlEventType.END_ELEMENT, + XmlEventType.END_DOCUMENT + ]); + }); + + it('supports hasNext lookahead and factory function', () => { + const reader = createStaxXmlCursorReaderSync('value'); + + expect(reader.hasNext()).toBe(true); + expect(reader.read()).toBe(true); + expect(reader.requireEvent().type).toBe(XmlEventType.START_DOCUMENT); + + expect(reader.hasNext()).toBe(true); + expect(reader.hasNext()).toBe(true); + expect(reader.read()).toBe(true); + expect(reader.requireEvent().type).toBe(XmlEventType.START_ELEMENT); + }); +});