diff --git a/packages/streamdown-rn/src/__tests__/sanitize.test.ts b/packages/streamdown-rn/src/__tests__/sanitize.test.ts index e4b0172..8e38713 100644 --- a/packages/streamdown-rn/src/__tests__/sanitize.test.ts +++ b/packages/streamdown-rn/src/__tests__/sanitize.test.ts @@ -74,6 +74,11 @@ describe('Security: URL Sanitization', () => { expect(sanitizeURL('wss://evil.com/socket')).toBeNull(); expect(sanitizeURL('blob:https://evil.com/uuid')).toBeNull(); }); + + it('should block protocol-relative URLs', () => { + expect(sanitizeURL('//evil.com/path')).toBeNull(); + expect(sanitizeURL('\\\\evil.local\\share')).toBeNull(); + }); }); describe('allowed protocols', () => { @@ -171,6 +176,25 @@ describe('Security: Prop Sanitization', () => { expect(safe.description).toBe('Some text without URLs'); }); + it('should sanitize protocol-relative URLs in URL-like keys', () => { + const props = { url: '//evil.com/payload', href: '/safe/path' }; + const safe = sanitizeProps(props); + expect(safe.url).toBe(''); + expect(safe.href).toBe('/safe/path'); + }); + + it('should sanitize URL-like keys even without explicit protocol prefix', () => { + const props = { + image_url: 'javascript:alert(1)', + endpoint: 'https://api.example.com/v1', + title: 'Status card', + }; + const safe = sanitizeProps(props); + expect(safe.image_url).toBe(''); + expect(safe.endpoint).toBe('https://api.example.com/v1'); + expect(safe.title).toBe('Status card'); + }); + it('should preserve primitives', () => { const props = { count: 42, active: true, data: null }; const safe = sanitizeProps(props); @@ -245,6 +269,12 @@ describe('Security: Full Pipeline Integration', () => { expect(data.props.src).toBe(''); }); + it('should block protocol-relative URLs in component props', () => { + const input = '[{c:"Image",p:{"src":"//evil.com/tracker.png"}}]'; + const data = extractComponentData(input); + expect(data.props.src).toBe(''); + }); + it('should preserve safe URLs in component props', () => { const input = '[{c:"Link",p:{"href":"https://example.com"}}]'; const data = extractComponentData(input); diff --git a/packages/streamdown-rn/src/__tests__/splitter.test.ts b/packages/streamdown-rn/src/__tests__/splitter.test.ts index 93cae3e..f81f185 100644 --- a/packages/streamdown-rn/src/__tests__/splitter.test.ts +++ b/packages/streamdown-rn/src/__tests__/splitter.test.ts @@ -111,6 +111,17 @@ describe('Block Splitter', () => { expect(registry.blocks.length).toBe(1); expect(registry.activeBlock).toBeNull(); }); + + it('should split code block and following paragraph in one chunk', () => { + const input = '```ts\nconst x = 1;\n```\n\nFollowing paragraph'; + const registry = processNewContent(INITIAL_REGISTRY, input); + + expect(registry.blocks.length).toBe(1); + expect(registry.blocks[0].type).toBe('codeBlock'); + expect(registry.blocks[0].content).toBe('```ts\nconst x = 1;\n```'); + expect(registry.activeBlock?.type).toBe('paragraph'); + expect(registry.activeBlock?.content.trim()).toBe('Following paragraph'); + }); }); describe('Component detection', () => { @@ -382,6 +393,36 @@ describe('Block Splitter', () => { registry = processNewContent(registry, '1. '); expect(registry.activeBlock?.type).toBe('list'); }); + + it('should produce identical results for large mixed input regardless of chunk size', () => { + const fullInput = [ + '# Title', + '', + 'Paragraph start ' + 'x'.repeat(3000), + '', + '[{c:"StatusCard",p:{"title":"Ops","status":"ok"}}]', + '', + '```js', + 'const n = 42;', + '```', + '', + 'Tail paragraph', + ].join('\n'); + + const allAtOnce = processNewContent(INITIAL_REGISTRY, fullInput); + + let chunked = INITIAL_REGISTRY; + for (let i = 64; i <= fullInput.length; i += 64) { + chunked = processNewContent(chunked, fullInput.slice(0, i)); + } + chunked = processNewContent(chunked, fullInput); + + expect(chunked.blocks.length).toBe(allAtOnce.blocks.length); + expect(chunked.blocks.map(b => b.type)).toEqual(allAtOnce.blocks.map(b => b.type)); + expect(chunked.blocks.map(b => b.content)).toEqual(allAtOnce.blocks.map(b => b.content)); + expect(chunked.activeBlock?.type).toBe(allAtOnce.activeBlock?.type); + expect(chunked.activeBlock?.content).toBe(allAtOnce.activeBlock?.content); + }); }); }); diff --git a/packages/streamdown-rn/src/core/sanitize.ts b/packages/streamdown-rn/src/core/sanitize.ts index be7f321..cc6d9b1 100644 --- a/packages/streamdown-rn/src/core/sanitize.ts +++ b/packages/streamdown-rn/src/core/sanitize.ts @@ -52,6 +52,14 @@ export function sanitizeURL(url: string): string | null { if (trimmed.length === 0) { return null; } + + // Block protocol-relative and UNC paths (can bypass protocol allowlists) + if (trimmed.startsWith('//') || trimmed.startsWith('\\\\')) { + if (process.env.NODE_ENV !== 'production') { + console.warn('[streamdown-rn] Blocked protocol-relative URL'); + } + return null; + } // Allow relative URLs - they're safe if (trimmed.startsWith('/') || trimmed.startsWith('#') || trimmed.startsWith('./') || trimmed.startsWith('../')) { @@ -91,6 +99,20 @@ function looksLikeURL(value: string): boolean { return /^[a-z][a-z0-9+.-]*:/i.test(value); } +/** + * Check if a prop key is likely intended to carry a URL. + */ +function isLikelyURLKey(key: string): boolean { + return /(?:^|_)(?:url|uri|href|src|link|website|endpoint|avatar|image)(?:$|_)/i.test(key); +} + +/** + * Decide whether a prop string should be URL-sanitized. + */ +function shouldSanitizeStringProp(key: string, value: string): boolean { + return isLikelyURLKey(key) || looksLikeURL(value); +} + /** * Recursively sanitize component props. * @@ -109,8 +131,8 @@ export function sanitizeProps(props: Record): Record): Record); } - if (typeof item === 'string' && looksLikeURL(item)) { + if (typeof item === 'string' && (isLikelyURLKey(key) || looksLikeURL(item))) { return sanitizeURL(item) ?? ''; } return item; diff --git a/packages/streamdown-rn/src/core/splitter/blockClosers.ts b/packages/streamdown-rn/src/core/splitter/blockClosers.ts index 5df5093..7314fa9 100644 --- a/packages/streamdown-rn/src/core/splitter/blockClosers.ts +++ b/packages/streamdown-rn/src/core/splitter/blockClosers.ts @@ -2,25 +2,42 @@ * Helpers to detect when special blocks are closed. */ -export function isCodeBlockClosed(content: string): boolean { - const lines = content.split('\n'); - if (lines.length < 2) return false; - - const firstLine = lines[0]; - const lastLine = lines[lines.length - 1]; +export function findCodeBlockCloseIndex(content: string): number { + const firstNewlineIndex = content.indexOf('\n'); + if (firstNewlineIndex === -1) return -1; + const firstLine = content.slice(0, firstNewlineIndex).replace(/\r$/, ''); const openMatch = firstLine.match(/^(`{3,}|~{3,})/); - if (!openMatch) return false; + if (!openMatch) return -1; const fence = openMatch[1]; const fenceChar = fence[0]; const fenceLen = fence.length; const closePattern = new RegExp(`^${fenceChar}{${fenceLen},}\\s*$`); - return closePattern.test(lastLine); + + let cursor = firstNewlineIndex + 1; + while (cursor <= content.length) { + const nextNewlineIndex = content.indexOf('\n', cursor); + const lineEnd = nextNewlineIndex === -1 ? content.length : nextNewlineIndex; + const line = content.slice(cursor, lineEnd).replace(/\r$/, ''); + + if (closePattern.test(line)) { + return lineEnd; + } + + if (nextNewlineIndex === -1) break; + cursor = nextNewlineIndex + 1; + } + + return -1; } -export function isComponentClosed(content: string): boolean { - if (!content.startsWith('[{')) return false; +export function isCodeBlockClosed(content: string): boolean { + return findCodeBlockCloseIndex(content) === content.length; +} + +export function findComponentCloseIndex(content: string): number { + if (!content.startsWith('[{')) return -1; let braceDepth = 1; let bracketDepth = 1; @@ -59,10 +76,14 @@ export function isComponentClosed(content: string): boolean { content[i - 1] === '}' && char === ']' ) { - return true; + return i + 1; } } - return false; + return -1; +} + +export function isComponentClosed(content: string): boolean { + return findComponentCloseIndex(content) === content.length; } diff --git a/packages/streamdown-rn/src/core/splitter/index.ts b/packages/streamdown-rn/src/core/splitter/index.ts index 4e8838f..1e47e0b 100644 --- a/packages/streamdown-rn/src/core/splitter/index.ts +++ b/packages/streamdown-rn/src/core/splitter/index.ts @@ -5,14 +5,14 @@ import { logDebug, logStateSnapshot } from './logger'; import { processLines } from './processLines'; import { finalizeBlock } from './finalizeBlock'; -const SPLITTER_VERSION = 'char-level-v1'; +const SPLITTER_VERSION = 'incremental-v2'; /** - * Process new content character-by-character. - * - * This ensures consistent block boundary detection regardless of chunk size. - * Whether content arrives 1 character at a time or 1000 characters at once, - * block boundaries are detected at the exact character position. + * Process new appended content incrementally. + * + * The splitter operates on the newly appended range and relies on explicit + * block-closure index detection (component/code blocks) to preserve + * chunk-size-independent boundaries without per-character full rescans. */ export function processNewContent( registry: BlockRegistry, @@ -30,52 +30,25 @@ export function processNewContent( return registry; } - // Process each new character individually to ensure consistent - // block boundary detection regardless of chunk size - let currentRegistry = registry; - - for (let i = registry.cursor; i < fullText.length; i++) { - // Process content up to position i+1 (one character at a time) - currentRegistry = processSingleCharacter(currentRegistry, fullText, i + 1); - } - - logStateSnapshot('state.after', currentRegistry); - return currentRegistry; -} - -/** - * Process content up to a specific position (single character increment). - * This is the core of character-level processing. - */ -function processSingleCharacter( - registry: BlockRegistry, - fullText: string, - endPos: number -): BlockRegistry { - // Only process if we have new content - if (endPos <= registry.cursor) { - return registry; - } - - const newContent = fullText.slice(registry.cursor, endPos); + const newContent = fullText.slice(registry.cursor); const activeContent = registry.activeBlock ? registry.activeBlock.content + newContent : newContent; + const activeStartPos = registry.activeBlock?.startPos ?? registry.cursor; const newTagState = updateTagState(registry.activeTagState, activeContent); const lines = activeContent.split('\n'); - // Create a virtual "fullText" that only goes up to endPos - // This ensures cursor is set correctly for this character - const virtualFullText = fullText.slice(0, endPos); - - return processLines({ + const currentRegistry = processLines({ registry, - fullText: virtualFullText, + fullText, lines, activeContent, tagState: newTagState, - activeStartPos: registry.activeBlock?.startPos ?? registry.cursor, + activeStartPos, }); + + logStateSnapshot('state.after', currentRegistry); + return currentRegistry; } export function resetRegistry(): BlockRegistry { diff --git a/packages/streamdown-rn/src/core/splitter/processLines.ts b/packages/streamdown-rn/src/core/splitter/processLines.ts index e445157..e38600e 100644 --- a/packages/streamdown-rn/src/core/splitter/processLines.ts +++ b/packages/streamdown-rn/src/core/splitter/processLines.ts @@ -2,7 +2,7 @@ import type { BlockRegistry, IncompleteTagState } from '../types'; import { INITIAL_INCOMPLETE_STATE, updateTagState } from '../incomplete'; import { detectBlockType, detectPartialBlockType } from './blockPatterns'; import { finalizeBlock } from './finalizeBlock'; -import { isCodeBlockClosed, isComponentClosed } from './blockClosers'; +import { findCodeBlockCloseIndex, findComponentCloseIndex } from './blockClosers'; import { logDebug } from './logger'; interface ProcessArgs { @@ -33,40 +33,84 @@ function handleExplicitClosingBlocks({ }: ProcessArgs): BlockRegistry | null { const currentType = registry.activeBlock?.type; if (currentType === 'codeBlock') { - if (isCodeBlockClosed(activeContent)) { + const closeIndex = findCodeBlockCloseIndex(activeContent); + if (closeIndex !== -1) { + const blockContent = activeContent.slice(0, closeIndex); + const remainder = activeContent.slice(closeIndex); const block = finalizeBlock( - activeContent, + blockContent, 'codeBlock', registry.blockCounter, registry.activeBlock!.startPos ); - return { + + const finalized: BlockRegistry = { blocks: [...registry.blocks, block], activeBlock: null, activeTagState: INITIAL_INCOMPLETE_STATE, cursor: fullText.length, blockCounter: registry.blockCounter + 1, }; + + const normalizedRemainder = normalizeBlockContent( + remainder, + registry.activeBlock!.startPos + closeIndex + ); + + if (!normalizedRemainder.content.trim()) { + return finalized; + } + + return processLines({ + registry: finalized, + fullText, + lines: normalizedRemainder.content.split('\n'), + activeContent: normalizedRemainder.content, + tagState: updateTagState(INITIAL_INCOMPLETE_STATE, normalizedRemainder.content), + activeStartPos: normalizedRemainder.startPos, + }); } return updateActiveBlock(registry, activeContent, tagState, fullText); } if (currentType === 'component') { - if (isComponentClosed(activeContent)) { + const closeIndex = findComponentCloseIndex(activeContent); + if (closeIndex !== -1) { + const blockContent = activeContent.slice(0, closeIndex); + const remainder = activeContent.slice(closeIndex); const block = finalizeBlock( - activeContent, + blockContent, 'component', registry.blockCounter, registry.activeBlock!.startPos ); - return { + + const finalized: BlockRegistry = { blocks: [...registry.blocks, block], activeBlock: null, activeTagState: INITIAL_INCOMPLETE_STATE, cursor: fullText.length, blockCounter: registry.blockCounter + 1, }; + + const normalizedRemainder = normalizeBlockContent( + remainder, + registry.activeBlock!.startPos + closeIndex + ); + + if (!normalizedRemainder.content.trim()) { + return finalized; + } + + return processLines({ + registry: finalized, + fullText, + lines: normalizedRemainder.content.split('\n'), + activeContent: normalizedRemainder.content, + tagState: updateTagState(INITIAL_INCOMPLETE_STATE, normalizedRemainder.content), + activeStartPos: normalizedRemainder.startPos, + }); } return updateActiveBlock(registry, activeContent, tagState, fullText); @@ -258,6 +302,16 @@ function handleActiveBlock({ if (!registry.activeBlock) { const { normalizedContent, trimmedChars } = trimLeadingWhitespace(activeContent); + + if (!normalizedContent) { + return { + ...registry, + activeBlock: null, + activeTagState: INITIAL_INCOMPLETE_STATE, + cursor: fullText.length, + }; + } + const normalizedLines = normalizedContent.split('\n'); // Use partial detection for immediate type recognition