From 1f8ef6131863340e9da1abb860820e02d324cacb Mon Sep 17 00:00:00 2001 From: Debian Date: Mon, 15 Jun 2026 12:51:00 +0100 Subject: [PATCH] fix: bash heredoc ending prematurely on single-word content lines The heredoc end regex in sh.yaml matched any single-word line, causing lines like 'DoesNotWork' inside a heredoc body to incorrectly terminate the region. Fix by: 1. Adding a delimiter capture mechanism to the highlighter engine: - region struct gains a delimiter field - regionWithDelimiter() captures the heredoc word from the start match - highlightRegion() verifies the end match equals the stored delimiter 2. Updating the sh.yaml heredoc regexes: - start: captures the delimiter word with a subexpression - end: matches only standard identifier words; the engine verifies it equals the captured delimiter Fixes #4114 --- pkg/highlight/heredoc_bug_test.go | 58 +++++++++++++++++++++++++++++++ pkg/highlight/highlighter.go | 47 +++++++++++++++++++++++++ pkg/highlight/parser.go | 1 + runtime/syntax/sh.yaml | 4 +-- 4 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 pkg/highlight/heredoc_bug_test.go diff --git a/pkg/highlight/heredoc_bug_test.go b/pkg/highlight/heredoc_bug_test.go new file mode 100644 index 0000000000..63581687bd --- /dev/null +++ b/pkg/highlight/heredoc_bug_test.go @@ -0,0 +1,58 @@ +package highlight + +import ( + "os" + "testing" +) + +// TestHeredocPrematureEnd verifies that a single word like "DoesNotWork" +// inside a bash heredoc body does not prematurely end the heredoc region. +// See https://github.com/micro-editor/micro/issues/4114 +func TestHeredocPrematureEnd(t *testing.T) { + data, err := os.ReadFile("../../runtime/syntax/sh.yaml") + if err != nil { + t.Fatalf("Failed to read syntax file: %v", err) + } + file, err := ParseFile(data) + if err != nil { + t.Fatalf("Failed to parse syntax file: %v", err) + } + def, err := ParseDef(file, nil) + if err != nil { + t.Fatalf("Failed to parse syntax def: %v", err) + } + h := NewHighlighter(def) + + // heredoc with a single word line inside that should NOT end it + input := "cat < 0 { + t.Fatalf("heredoc ended prematurely on line 'DoesNotWork' at position %d", pos) + } + if group != constStringGroup && group != 0 { + t.Fatalf("unexpected group %d on line 'DoesNotWork' at position %d", group, pos) + } + } + + // Line 4 (index 4, "HELLO") should end the heredoc. + // The HELLO line starts in the heredoc and transitions to group 0 after the word. + foundEnd := false + for group := range matches[4] { + if group == 0 { + foundEnd = true + } + } + if !foundEnd { + t.Fatal("heredoc did not end on the proper delimiter line 'HELLO'") + } +} diff --git a/pkg/highlight/highlighter.go b/pkg/highlight/highlighter.go index a13a72610d..7451bc638d 100644 --- a/pkg/highlight/highlighter.go +++ b/pkg/highlight/highlighter.go @@ -101,6 +101,40 @@ func findIndex(regex *regexp.Regexp, skip *regexp.Regexp, str []byte) []int { return []int{runePos(match[0], str), runePos(match[1], str)} } +// regionWithDelimiter returns a copy of the region with the delimiter set from +// the start match on the line. If the region's start regex has no capture groups, +// it returns the original region unchanged. +func regionWithDelimiter(r *region, line []byte) *region { + if r.start.NumSubexp() == 0 { + return r + } + sub := r.start.FindSubmatch(line) + if len(sub) <= 1 { + return r + } + rc := *r + rc.delimiter = string(sub[1]) + return &rc +} + +// bytePos converts a rune position to a byte position in the given slice. +func bytePos(runeIdx int, str []byte) int { + if runeIdx <= 0 { + return 0 + } + count := 0 + totalSize := 0 + for totalSize < len(str) { + if count >= runeIdx { + return totalSize + } + _, _, size := DecodeCharacter(str[totalSize:]) + totalSize += size + count++ + } + return len(str) +} + func findAllIndex(regex *regexp.Regexp, str []byte) [][]int { matches := regex.FindAllIndex(str, -1) for i, m := range matches { @@ -124,6 +158,17 @@ func (h *Highlighter) highlightRegion(highlights LineMatch, start int, canMatchE firstLoc := []int{lineLen, 0} searchNesting := true endLoc := findIndex(curRegion.end, curRegion.skip, line) + if endLoc != nil && curRegion.delimiter != "" { + // When the region has a captured delimiter (e.g. heredoc), + // verify the matched text on the original line equals the delimiter. + // endLoc contains rune positions; convert to byte positions. + bStart := bytePos(endLoc[0], line) + bEnd := bytePos(endLoc[1], line) + matched := string(line[bStart:bEnd]) + if matched != curRegion.delimiter { + endLoc = nil + } + } if endLoc != nil { if start == endLoc[0] { searchNesting = false @@ -146,6 +191,7 @@ func (h *Highlighter) highlightRegion(highlights LineMatch, start int, canMatchE if !statesOnly { highlights[start+firstLoc[0]] = firstRegion.limitGroup } + firstRegion = regionWithDelimiter(firstRegion, line) h.highlightEmptyRegion(highlights, start+firstLoc[1], canMatchEnd, lineNum, sliceStart(line, firstLoc[1]), statesOnly) h.highlightRegion(highlights, start+firstLoc[1], canMatchEnd, lineNum, sliceStart(line, firstLoc[1]), firstRegion, statesOnly) return highlights @@ -228,6 +274,7 @@ func (h *Highlighter) highlightEmptyRegion(highlights LineMatch, start int, canM if !statesOnly { highlights[start+firstLoc[0]] = firstRegion.limitGroup } + firstRegion = regionWithDelimiter(firstRegion, line) h.highlightEmptyRegion(highlights, start, false, lineNum, sliceEnd(line, firstLoc[0]), statesOnly) h.highlightRegion(highlights, start+firstLoc[1], canMatchEnd, lineNum, sliceStart(line, firstLoc[1]), firstRegion, statesOnly) return highlights diff --git a/pkg/highlight/parser.go b/pkg/highlight/parser.go index 804e6bc0bd..73df97f56d 100644 --- a/pkg/highlight/parser.go +++ b/pkg/highlight/parser.go @@ -86,6 +86,7 @@ type region struct { end *regexp.Regexp skip *regexp.Regexp rules *rules + delimiter string // for heredoc-like regions: the captured delimiter from the start match } func init() { diff --git a/runtime/syntax/sh.yaml b/runtime/syntax/sh.yaml index e6f75a80d1..e34f8a870d 100644 --- a/runtime/syntax/sh.yaml +++ b/runtime/syntax/sh.yaml @@ -57,8 +57,8 @@ rules: rules: [] - constant.string: - start: "<<[^\\s]+[-~.]*[A-Za-z0-9]+$" - end: "^[^\\s]+[A-Za-z0-9]+$" + start: "<<-?([A-Za-z_][A-Za-z0-9_]*)$" + end: "^([A-Za-z_][A-Za-z0-9_]*)$" skip: "\\\\." rules: []