From f066d91703d1fd680e2327189da0e5013a6469cd Mon Sep 17 00:00:00 2001 From: Br1an67 <932039080@qq.com> Date: Mon, 2 Mar 2026 01:08:54 +0800 Subject: [PATCH] fix(parser): skip hash symbols inside code blocks when parsing headers When parsing markdown sections, lines starting with # inside fenced code blocks (e.g. bash comments) were incorrectly treated as headers. This caused the parser to split content at those lines, resulting in incorrect section boundaries and lost requirement content. Added inCodeBlock state tracking to parseSections() and getContentUntilNextHeader() that toggles on triple-backtick fence lines and skips header regex matching while inside code blocks. Fixes #312 --- src/core/parsers/markdown-parser.ts | 24 +++++++++++++ test/core/parsers/markdown-parser.test.ts | 43 +++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/src/core/parsers/markdown-parser.ts b/src/core/parsers/markdown-parser.ts index 8bd59d1ae..8ef6e136b 100644 --- a/src/core/parsers/markdown-parser.ts +++ b/src/core/parsers/markdown-parser.ts @@ -78,9 +78,20 @@ export class MarkdownParser { protected parseSections(): Section[] { const sections: Section[] = []; const stack: Section[] = []; + let inCodeBlock = false; for (let i = 0; i < this.lines.length; i++) { const line = this.lines[i]; + + if (line.trim().startsWith('```')) { + inCodeBlock = !inCodeBlock; + continue; + } + + if (inCodeBlock) { + continue; + } + const headerMatch = line.match(/^(#{1,6})\s+(.+)$/); if (headerMatch) { @@ -114,9 +125,22 @@ export class MarkdownParser { protected getContentUntilNextHeader(startLine: number, currentLevel: number): string { const contentLines: string[] = []; + let inCodeBlock = false; for (let i = startLine; i < this.lines.length; i++) { const line = this.lines[i]; + + if (line.trim().startsWith('```')) { + inCodeBlock = !inCodeBlock; + contentLines.push(line); + continue; + } + + if (inCodeBlock) { + contentLines.push(line); + continue; + } + const headerMatch = line.match(/^(#{1,6})\s+/); if (headerMatch && headerMatch[1].length <= currentLevel) { diff --git a/test/core/parsers/markdown-parser.test.ts b/test/core/parsers/markdown-parser.test.ts index 502f575b4..b3b5eba04 100644 --- a/test/core/parsers/markdown-parser.test.ts +++ b/test/core/parsers/markdown-parser.test.ts @@ -287,5 +287,48 @@ Then result`; expect(spec.requirements[0].text).toBe('This is the actual requirement text.'); }); + + it('should not treat hash symbols inside code blocks as headers', () => { + const content = `# test-bug Specification + +## Purpose +Minimal test case to reproduce the code block parsing bug. + +## Requirements + +### Requirement: First requirement before code block +This requirement comes before the problematic code block. + +#### Scenario: Example with code block containing hash symbols +- **GIVEN** a code block with bash comments +\`\`\`bash +# This is a comment +echo "hello" +# Another comment +\`\`\` +- **THEN** the parser should ignore hash symbols inside code blocks + +### Requirement: Second requirement after code block +This requirement comes after the code block. + +#### Scenario: Another scenario +- **GIVEN** something +- **THEN** something happens + +### Requirement: Third requirement +This is the third requirement. + +#### Scenario: Third scenario +- **GIVEN** more conditions +- **THEN** more results`; + + const parser = new MarkdownParser(content); + const spec = parser.parseSpec('test-bug'); + + expect(spec.requirements).toHaveLength(3); + expect(spec.requirements[0].text).toContain('before the problematic code block'); + expect(spec.requirements[1].text).toContain('after the code block'); + expect(spec.requirements[2].text).toContain('third requirement'); + }); }); }); \ No newline at end of file