From ac6f641f1ef054d66947a841459eaf3dc80a96de Mon Sep 17 00:00:00 2001 From: Kayvan Sylvan Date: Wed, 18 Jun 2025 07:36:25 -0700 Subject: [PATCH] feat: enhance text sanitization to support CJK characters **CHANGES:** - Update sanitizeText to handle Chinese, Japanese, Korean characters. - Extend regex to include Unicode ranges for CJK. - Add test for CLI explode command with CJK support. - Bump version to 1.6.0 for new feature release. --- bin/md-tree.js | 5 ++++- package.json | 2 +- test/test-cjk.md | 17 +++++++++++++++++ test/test-cli.js | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 test/test-cjk.md diff --git a/bin/md-tree.js b/bin/md-tree.js index 90c4df7..d7de8f9 100755 --- a/bin/md-tree.js +++ b/bin/md-tree.js @@ -107,7 +107,10 @@ class MarkdownCLI { sanitizeText(text) { return text .toLowerCase() - .replace(/[^a-z0-9\s-]/g, '') + .replace( + /[^a-z0-9\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af\s-]/g, + '' + ) .replace(/\s+/g, '-') .replace(/-+/g, '-') .replace(/^-|-$/g, ''); diff --git a/package.json b/package.json index efc373e..76ce735 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@kayvan/markdown-tree-parser", - "version": "1.5.1", + "version": "1.6.0", "description": "A powerful JavaScript library and CLI tool for parsing and manipulating markdown files as tree structures using the remark/unified ecosystem", "type": "module", "main": "index.js", diff --git a/test/test-cjk.md b/test/test-cjk.md new file mode 100644 index 0000000..76734ba --- /dev/null +++ b/test/test-cjk.md @@ -0,0 +1,17 @@ +# Document Title + +## 章节一 + +This is the first section. + +## 章二 + +This is the second section. + +### セクション 2.1 + +This is a subsection. + +## Another Section + +This is another section. diff --git a/test/test-cli.js b/test/test-cli.js index 994329e..be59fd5 100644 --- a/test/test-cli.js +++ b/test/test-cli.js @@ -445,6 +445,42 @@ This document has links too: ); }); + await test('CLI explode command with CJK characters', async () => { + const cjkTestFile = path.resolve(__dirname, 'test-cjk.md'); + const outputDir = path.join(testDir, 'cjk-exploded'); + const result = await runCLI(['explode', cjkTestFile, outputDir]); + + assert( + result.code === 0, + `Command should succeed, got exit code ${result.code}\nstderr: ${result.stderr}` + ); + + const files = await fs.readdir(outputDir); + assert(files.includes('index.md'), 'Should create index.md'); + assert(files.includes('章节一.md'), 'Should create file with Chinese slug'); + assert(files.includes('章二.md'), 'Should create file with Chinese slug'); + assert( + files.includes('another-section.md'), + 'Should create file with English slug' + ); + + // Check the index file for correct links + const indexPath = path.join(outputDir, 'index.md'); + const indexContent = await fs.readFile(indexPath, 'utf-8'); + assert( + indexContent.includes('[章节一](./章节一.md)'), + 'Should link to Chinese filename' + ); + assert( + indexContent.includes('[章二](./章二.md)'), + 'Should link to Chinese filename' + ); + assert( + indexContent.includes('[セクション 2.1](./章二.md#セクション-21)'), + 'Should link to Japanese subsection' + ); + }); + await cleanupTests(); // Summary