From ceea2378f6384454611bde05501c0782df0df4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Mon, 5 Feb 2024 20:47:45 +0100 Subject: [PATCH 1/5] buffer: Build the lines with runes --- internal/buffer/line_array.go | 9 ++++++- internal/util/unicode.go | 48 +++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/internal/buffer/line_array.go b/internal/buffer/line_array.go index eecb6b5904..7a4fc05bac 100644 --- a/internal/buffer/line_array.go +++ b/internal/buffer/line_array.go @@ -44,7 +44,8 @@ type searchState struct { // A Line contains the data in bytes as well as a highlight state, match // and a flag for whether the highlighting needs to be updated type Line struct { - data []byte + data []byte + runes []rune state highlight.State match highlight.LineMatch @@ -146,8 +147,10 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray if err != nil { if err == io.EOF { + runes, _ := util.DecodeCharacters(data) la.lines = Append(la.lines, Line{ data: data, + runes: runes, state: nil, match: nil, }) @@ -155,8 +158,10 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray // Last line was read break } else { + runes, _ := util.DecodeCharacters(data[:dlen-1]) la.lines = Append(la.lines, Line{ data: data[:dlen-1], + runes: runes, state: nil, match: nil, }) @@ -189,12 +194,14 @@ func (la *LineArray) Bytes() []byte { func (la *LineArray) newlineBelow(y int) { la.lines = append(la.lines, Line{ data: []byte{' '}, + runes: []rune{}, state: nil, match: nil, }) copy(la.lines[y+2:], la.lines[y+1:]) la.lines[y+1] = Line{ data: []byte{}, + runes: []rune{}, state: la.lines[y].state, match: nil, } diff --git a/internal/util/unicode.go b/internal/util/unicode.go index 14243e68be..9c05cdfd7c 100644 --- a/internal/util/unicode.go +++ b/internal/util/unicode.go @@ -64,6 +64,54 @@ func DecodeCharacterInString(str string) (rune, []rune, int) { return r, combc, size } +// DecodeCharacters returns the characters from an array of bytes +func DecodeCharacters(b []byte) ([]rune, int) { + var runes []rune + size := 0 + + for len(b) > 0 { + r, s := utf8.DecodeRune(b) + runes = append(runes, r) + size += s + b = b[s:] + r, s = utf8.DecodeRune(b) + + for isMark(r) { + runes = append(runes, r) + size += s + + b = b[s:] + r, s = utf8.DecodeRune(b) + } + } + + return runes, size +} + +// DecodeCharactersInString returns characters from a string +func DecodeCharactersInString(str string) ([]rune, int) { + var runes []rune + size := 0 + + for len(str) > 0 { + r, s := utf8.DecodeRuneInString(str) + runes = append(runes, r) + size += s + str = str[s:] + r, s = utf8.DecodeRuneInString(str) + + for isMark(r) { + runes = append(runes, r) + size += s + + str = str[s:] + r, s = utf8.DecodeRuneInString(str) + } + } + + return runes, size +} + // CharacterCount returns the number of characters in a byte array // Similar to utf8.RuneCount but for unicode characters func CharacterCount(b []byte) int { From 2e1249cc672cff287b749cdd2b274e5972812014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Mon, 5 Feb 2024 22:39:47 +0100 Subject: [PATCH 2/5] buffer: Remove data as structure element of Line --- internal/buffer/buffer.go | 21 +++--- internal/buffer/line_array.go | 134 ++++++++++++++++++---------------- internal/buffer/save.go | 15 ++-- 3 files changed, 88 insertions(+), 82 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 2735ca467c..60d5cd7d22 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -165,7 +165,7 @@ func (b *SharedBuffer) calcHash(out *[md5.Size]byte) { h := md5.New() if len(b.lines) > 0 { - h.Write(b.lines[0].data) + h.Write(b.lines[0].data()) for _, l := range b.lines[1:] { if b.Endings == FFDos { @@ -173,7 +173,7 @@ func (b *SharedBuffer) calcHash(out *[md5.Size]byte) { } else { h.Write([]byte{'\n'}) } - h.Write(l.data) + h.Write(l.data()) } } @@ -866,7 +866,7 @@ func (b *Buffer) UpdateRules() { if header.MatchFileName(b.Path) { matchedFileName = true } - if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data) { + if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data()) { matchedFileHeader = true } } else if header.FileType == ft { @@ -920,7 +920,7 @@ func (b *Buffer) UpdateRules() { if header.MatchFileName(b.Path) { fnameMatches = append(fnameMatches, syntaxFileInfo{header, f.Name(), nil}) } - if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data) { + if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data()) { headerMatches = append(headerMatches, syntaxFileInfo{header, f.Name(), nil}) } } else if header.FileType == ft { @@ -953,7 +953,7 @@ func (b *Buffer) UpdateRules() { for _, m := range matches { if m.header.HasFileSignature() { for i := 0; i < limit; i++ { - if m.header.MatchFileSignature(b.lines[i].data) { + if m.header.MatchFileSignature(b.lines[i].data()) { syntaxFile = m.fileName if m.syntaxDef != nil { b.SyntaxDef = m.syntaxDef @@ -1130,11 +1130,11 @@ func (b *Buffer) MoveLinesUp(start int, end int) { if start < 1 || start >= end || end > len(b.lines) { return } - l := string(b.LineBytes(start - 1)) + l := string(b.lines[start-1].runes) if end == len(b.lines) { b.insert( Loc{ - util.CharacterCount(b.lines[end-1].data), + len(b.lines[end-1].runes), end - 1, }, []byte{'\n'}, @@ -1155,7 +1155,7 @@ func (b *Buffer) MoveLinesDown(start int, end int) { if start < 0 || start >= end || end >= len(b.lines) { return } - l := string(b.LineBytes(end)) + l := string(b.lines[end].runes) b.Insert( Loc{0, start}, l+"\n", @@ -1196,7 +1196,7 @@ func (b *Buffer) findMatchingBrace(braceType [2]rune, start Loc, char rune) (Loc } } else if char == braceType[1] { for y := start.Y; y >= 0; y-- { - l := []rune(string(b.lines[y].data)) + l := []rune(string(b.LineBytes(y))) xInit := len(l) - 1 if y == start.Y { xInit = start.X @@ -1281,7 +1281,8 @@ func (b *Buffer) Retab() { l = bytes.TrimLeft(l, " \t") b.Lock() - b.lines[i].data = append(ws, l...) + runes, _ := util.DecodeCharacters(append(ws, l...)) + b.lines[i].runes = runes b.Unlock() b.MarkModified(i, i) diff --git a/internal/buffer/line_array.go b/internal/buffer/line_array.go index 7a4fc05bac..a74ee5815a 100644 --- a/internal/buffer/line_array.go +++ b/internal/buffer/line_array.go @@ -10,28 +10,6 @@ import ( "github.com/micro-editor/micro/v2/pkg/highlight" ) -// Finds the byte index of the nth rune in a byte slice -func runeToByteIndex(n int, txt []byte) int { - if n == 0 { - return 0 - } - - count := 0 - i := 0 - for len(txt) > 0 { - _, _, size := util.DecodeCharacter(txt) - - txt = txt[size:] - count += size - i++ - - if i == n { - break - } - } - return count -} - // A searchState contains the search match info for a single line type searchState struct { search string @@ -41,10 +19,9 @@ type searchState struct { done bool } -// A Line contains the data in bytes as well as a highlight state, match +// A Line contains the slice of runes as well as a highlight state, match // and a flag for whether the highlighting needs to be updated type Line struct { - data []byte runes []rune state highlight.State @@ -60,6 +37,10 @@ type Line struct { search map[*Buffer]*searchState } +func (l Line) data() []byte { + return []byte(string(l.runes)) +} + const ( // Line ending file formats FFAuto = 0 // Autodetect format @@ -95,7 +76,7 @@ func Append(slice []Line, data ...Line) []Line { return slice } -// NewLineArray returns a new line array from an array of bytes +// NewLineArray returns a new line array from an array of runes func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray { la := new(LineArray) @@ -149,7 +130,6 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray if err == io.EOF { runes, _ := util.DecodeCharacters(data) la.lines = Append(la.lines, Line{ - data: data, runes: runes, state: nil, match: nil, @@ -160,7 +140,6 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray } else { runes, _ := util.DecodeCharacters(data[:dlen-1]) la.lines = Append(la.lines, Line{ - data: data[:dlen-1], runes: runes, state: nil, match: nil, @@ -179,7 +158,7 @@ func (la *LineArray) Bytes() []byte { // initsize should provide a good estimate b.Grow(int(la.initsize + 4096)) for i, l := range la.lines { - b.Write(l.data) + b.Write(l.data()) if i != len(la.lines)-1 { if la.Endings == FFDos { b.WriteByte('\r') @@ -193,14 +172,12 @@ func (la *LineArray) Bytes() []byte { // newlineBelow adds a newline below the given line number func (la *LineArray) newlineBelow(y int) { la.lines = append(la.lines, Line{ - data: []byte{' '}, runes: []rune{}, state: nil, match: nil, }) copy(la.lines[y+2:], la.lines[y+1:]) la.lines[y+1] = Line{ - data: []byte{}, runes: []rune{}, state: la.lines[y].state, match: nil, @@ -212,41 +189,57 @@ func (la *LineArray) insert(pos Loc, value []byte) { la.lock.Lock() defer la.lock.Unlock() - x, y := runeToByteIndex(pos.X, la.lines[pos.Y].data), pos.Y - for i := 0; i < len(value); i++ { - if value[i] == '\n' || (value[i] == '\r' && i < len(value)-1 && value[i+1] == '\n') { + runes, _ := util.DecodeCharacters(value) + x, y := util.Min(pos.X, len(la.lines[pos.Y].runes)), pos.Y + start := -1 + for i := 0; i < len(runes); i++ { + if runes[i] == '\n' || (runes[i] == '\r' && i < len(runes)-1 && runes[i+1] == '\n') { la.split(Loc{x, y}) + if i > 0 && start < len(runes) && start < i { + if start < 0 { + start = 0 + } + la.insertRunes(Loc{x, y}, runes[start:i]) + } + x = 0 y++ - if value[i] == '\r' { + if runes[i] == '\r' { i++ } + if i+1 <= len(runes) { + + start = i + 1 + } continue } - la.insertByte(Loc{x, y}, value[i]) - x++ + } + if start < 0 { + la.insertRunes(Loc{x, y}, runes) + } else if start < len(runes) { + la.insertRunes(Loc{x, y}, runes[start:]) } } -// InsertByte inserts a byte at a given location -func (la *LineArray) insertByte(pos Loc, value byte) { - la.lines[pos.Y].data = append(la.lines[pos.Y].data, 0) - copy(la.lines[pos.Y].data[pos.X+1:], la.lines[pos.Y].data[pos.X:]) - la.lines[pos.Y].data[pos.X] = value +// Inserts a rune array at a given location +func (la *LineArray) insertRunes(pos Loc, runes []rune) { + la.lines[pos.Y].runes = append(la.lines[pos.Y].runes, runes...) + copy(la.lines[pos.Y].runes[pos.X+len(runes):], la.lines[pos.Y].runes[pos.X:]) + copy(la.lines[pos.Y].runes[pos.X:], runes) } // joinLines joins the two lines a and b func (la *LineArray) joinLines(a, b int) { - la.lines[a].data = append(la.lines[a].data, la.lines[b].data...) + la.insertRunes(Loc{len(la.lines[a].runes), a}, la.lines[b].runes) la.deleteLine(b) } // split splits a line at a given position func (la *LineArray) split(pos Loc) { la.newlineBelow(pos.Y) - la.lines[pos.Y+1].data = append(la.lines[pos.Y+1].data, la.lines[pos.Y].data[pos.X:]...) + la.insertRunes(Loc{0, pos.Y + 1}, la.lines[pos.Y].runes[pos.X:]) la.lines[pos.Y+1].state = la.lines[pos.Y].state la.lines[pos.Y].state = nil la.lines[pos.Y].match = nil @@ -260,10 +253,10 @@ func (la *LineArray) remove(start, end Loc) []byte { defer la.lock.Unlock() sub := la.Substr(start, end) - startX := runeToByteIndex(start.X, la.lines[start.Y].data) - endX := runeToByteIndex(end.X, la.lines[end.Y].data) + startX := util.Min(start.X, len(la.lines[start.Y].runes)) + endX := util.Min(end.X, len(la.lines[end.Y].runes)) if start.Y == end.Y { - la.lines[start.Y].data = append(la.lines[start.Y].data[:startX], la.lines[start.Y].data[endX:]...) + la.lines[start.Y].runes = append(la.lines[start.Y].runes[:startX], la.lines[start.Y].runes[endX:]...) } else { la.deleteLines(start.Y+1, end.Y-1) la.deleteToEnd(Loc{startX, start.Y}) @@ -275,12 +268,12 @@ func (la *LineArray) remove(start, end Loc) []byte { // deleteToEnd deletes from the end of a line to the position func (la *LineArray) deleteToEnd(pos Loc) { - la.lines[pos.Y].data = la.lines[pos.Y].data[:pos.X] + la.lines[pos.Y].runes = la.lines[pos.Y].runes[:pos.X] } // deleteFromStart deletes from the start of a line to the position func (la *LineArray) deleteFromStart(pos Loc) { - la.lines[pos.Y].data = la.lines[pos.Y].data[pos.X+1:] + la.lines[pos.Y].runes = la.lines[pos.Y].runes[pos.X+1:] } // deleteLine deletes the line number @@ -294,22 +287,19 @@ func (la *LineArray) deleteLines(y1, y2 int) { // Substr returns the string representation between two locations func (la *LineArray) Substr(start, end Loc) []byte { - startX := runeToByteIndex(start.X, la.lines[start.Y].data) - endX := runeToByteIndex(end.X, la.lines[end.Y].data) - if start.Y == end.Y { - src := la.lines[start.Y].data[startX:endX] - dest := make([]byte, len(src)) - copy(dest, src) - return dest + startX := util.Min(start.X, len(la.lines[start.Y].runes)) + endX := util.Min(end.X, len(la.lines[end.Y].runes)) + if start.Y == end.Y && startX <= endX { + return []byte(string(la.lines[start.Y].runes[startX:endX])) } - str := make([]byte, 0, len(la.lines[start.Y+1].data)*(end.Y-start.Y)) - str = append(str, la.lines[start.Y].data[startX:]...) + var str []byte + str = append(str, []byte(string(la.lines[start.Y].runes[startX:]))...) str = append(str, '\n') for i := start.Y + 1; i <= end.Y-1; i++ { - str = append(str, la.lines[i].data...) + str = append(str, []byte(string(la.lines[i].runes))...) str = append(str, '\n') } - str = append(str, la.lines[end.Y].data[:endX]...) + str = append(str, []byte(string(la.lines[end.Y].runes[:endX]))...) return str } @@ -326,15 +316,31 @@ func (la *LineArray) Start() Loc { // End returns the location of the last character in the buffer func (la *LineArray) End() Loc { numlines := len(la.lines) - return Loc{util.CharacterCount(la.lines[numlines-1].data), numlines - 1} + return Loc{len(la.lines[numlines-1].runes), numlines - 1} +} + +// Line returns line n as an array of runes +func (la *LineArray) Line(n int) []rune { + if n >= len(la.lines) || n < 0 { + return []rune{} + } + return la.lines[n].runes } // LineBytes returns line n as an array of bytes -func (la *LineArray) LineBytes(lineN int) []byte { - if lineN >= len(la.lines) || lineN < 0 { +func (la *LineArray) LineBytes(n int) []byte { + if n >= len(la.lines) || n < 0 { return []byte{} } - return la.lines[lineN].data + return []byte(string(la.lines[n].runes)) +} + +// LineString returns line n as an string +func (la *LineArray) LineString(n int) string { + if n >= len(la.lines) || n < 0 { + return string("") + } + return string(la.lines[n].runes) } // State gets the highlight state for the given line number @@ -416,7 +422,7 @@ func (la *LineArray) SearchMatch(b *Buffer, pos Loc) bool { if !s.done { s.match = nil start := Loc{0, lineN} - end := Loc{util.CharacterCount(la.lines[lineN].data), lineN} + end := Loc{len(la.lines[lineN].runes), lineN} for start.X < end.X { m, found, _ := b.FindNext(b.LastSearch, start, end, start, true, b.LastSearchRegex) if !found { diff --git a/internal/buffer/save.go b/internal/buffer/save.go index 44e8f4a3ed..fb98e41d83 100644 --- a/internal/buffer/save.go +++ b/internal/buffer/save.go @@ -2,7 +2,6 @@ package buffer import ( "bufio" - "bytes" "errors" "io" "io/fs" @@ -11,6 +10,7 @@ import ( "os/signal" "path/filepath" "runtime" + "strings" "time" "unicode" @@ -156,7 +156,7 @@ func (wf wrappedFile) Write(b *SharedBuffer) (int, error) { } // write lines - size, err := file.Write(b.lines[0].data) + size, err := file.Write(b.lines[0].data()) if err != nil { return 0, err } @@ -165,10 +165,10 @@ func (wf wrappedFile) Write(b *SharedBuffer) (int, error) { if _, err = file.Write(eol); err != nil { return 0, err } - if _, err = file.Write(l.data); err != nil { + if _, err = file.Write(l.data()); err != nil { return 0, err } - size += len(eol) + len(l.data) + size += len(eol) + len(l.data()) } err = file.Flush() @@ -249,10 +249,9 @@ func (b *Buffer) saveToFile(filename string, withSudo bool, autoSave bool) error if !autoSave && b.Settings["rmtrailingws"].(bool) { for i, l := range b.lines { - leftover := util.CharacterCount(bytes.TrimRightFunc(l.data, unicode.IsSpace)) - - linelen := util.CharacterCount(l.data) - b.Remove(Loc{leftover, i}, Loc{linelen, i}) + leftover := strings.TrimRightFunc(string(l.runes), unicode.IsSpace) + linelen := len(l.runes) + b.Remove(Loc{len(leftover), i}, Loc{linelen, i}) } b.RelocateCursors() From 187ba51fd6251f1f8b31b7ed270bf20508751ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Sun, 25 Jan 2026 15:38:08 +0100 Subject: [PATCH 3/5] buffer: Rework to retain support of combined characters --- internal/buffer/buffer.go | 14 ++-- internal/buffer/line_array.go | 128 +++++++++++++++++++++++++--------- internal/buffer/save.go | 2 +- internal/util/unicode.go | 61 +++++++--------- 4 files changed, 134 insertions(+), 71 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 60d5cd7d22..787c9ec466 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -1130,7 +1130,7 @@ func (b *Buffer) MoveLinesUp(start int, end int) { if start < 1 || start >= end || end > len(b.lines) { return } - l := string(b.lines[start-1].runes) + l := b.LineString(start - 1) if end == len(b.lines) { b.insert( Loc{ @@ -1155,7 +1155,7 @@ func (b *Buffer) MoveLinesDown(start int, end int) { if start < 0 || start >= end || end >= len(b.lines) { return } - l := string(b.lines[end].runes) + l := b.LineString(end) b.Insert( Loc{0, start}, l+"\n", @@ -1281,7 +1281,13 @@ func (b *Buffer) Retab() { l = bytes.TrimLeft(l, " \t") b.Lock() - runes, _ := util.DecodeCharacters(append(ws, l...)) + ws = append(ws, l...) + var runes []Character + for len(ws) > 0 { + combc, s := util.DecodeCombinedCharacter(ws) + runes = append(runes, Character{combc}) + ws = ws[s:] + } b.lines[i].runes = runes b.Unlock() @@ -1318,7 +1324,7 @@ func ParseCursorLocation(cursorPositions []string) (Loc, error) { // Line returns the string representation of the given line number func (b *Buffer) Line(i int) string { - return string(b.LineBytes(i)) + return b.LineString(i) } func (b *Buffer) Write(bytes []byte) (n int, err error) { diff --git a/internal/buffer/line_array.go b/internal/buffer/line_array.go index a74ee5815a..9630619c21 100644 --- a/internal/buffer/line_array.go +++ b/internal/buffer/line_array.go @@ -19,10 +19,14 @@ type searchState struct { done bool } +type Character struct { + combc []rune +} + // A Line contains the slice of runes as well as a highlight state, match // and a flag for whether the highlighting needs to be updated type Line struct { - runes []rune + runes []Character state highlight.State match highlight.LineMatch @@ -37,8 +41,22 @@ type Line struct { search map[*Buffer]*searchState } +// data returns the line as byte slice func (l Line) data() []byte { - return []byte(string(l.runes)) + var runes []rune + for _, r := range l.runes { + runes = append(runes, r.combc[0:]...) + } + return []byte(string(runes)) +} + +// String returns the line as string +func (l Line) String() string { + var runes []rune + for _, r := range l.runes { + runes = append(runes, r.combc[0:]...) + } + return string(runes) } const ( @@ -126,9 +144,14 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray loaded += dlen } + var runes []Character if err != nil { if err == io.EOF { - runes, _ := util.DecodeCharacters(data) + for len(data) > 0 { + combc, s := util.DecodeCombinedCharacter(data) + runes = append(runes, Character{combc}) + data = data[s:] + } la.lines = Append(la.lines, Line{ runes: runes, state: nil, @@ -138,7 +161,12 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray // Last line was read break } else { - runes, _ := util.DecodeCharacters(data[:dlen-1]) + data = data[:dlen-1] + for len(data) > 0 { + combc, s := util.DecodeCombinedCharacter(data) + runes = append(runes, Character{combc}) + data = data[s:] + } la.lines = Append(la.lines, Line{ runes: runes, state: nil, @@ -172,13 +200,13 @@ func (la *LineArray) Bytes() []byte { // newlineBelow adds a newline below the given line number func (la *LineArray) newlineBelow(y int) { la.lines = append(la.lines, Line{ - runes: []rune{}, + runes: []Character{}, state: nil, match: nil, }) copy(la.lines[y+2:], la.lines[y+1:]) la.lines[y+1] = Line{ - runes: []rune{}, + runes: []Character{}, state: la.lines[y].state, match: nil, } @@ -189,31 +217,39 @@ func (la *LineArray) insert(pos Loc, value []byte) { la.lock.Lock() defer la.lock.Unlock() - runes, _ := util.DecodeCharacters(value) + var runes []Character + for len(value) > 0 { + combc, s := util.DecodeCombinedCharacter(value) + runes = append(runes, Character{combc}) + value = value[s:] + } x, y := util.Min(pos.X, len(la.lines[pos.Y].runes)), pos.Y start := -1 - for i := 0; i < len(runes); i++ { - if runes[i] == '\n' || (runes[i] == '\r' && i < len(runes)-1 && runes[i+1] == '\n') { - la.split(Loc{x, y}) - if i > 0 && start < len(runes) && start < i { - if start < 0 { - start = 0 + +outer: + for i, r := range runes { + for j := 0; j < len(r.combc); j++ { + if r.combc[j] == '\n' || (r.combc[j] == '\r' && i < len(runes)-1 && r.combc[j+1] == '\n') { + la.split(Loc{x, y}) + if i > 0 && start < len(runes) && start < i { + if start < 0 { + start = 0 + } + la.insertRunes(Loc{x, y}, runes[start:i]) } - la.insertRunes(Loc{x, y}, runes[start:i]) - } - x = 0 - y++ + x = 0 + y++ - if runes[i] == '\r' { - i++ - } - if i+1 <= len(runes) { + if r.combc[j] == '\r' { + i++ + } + if i+1 <= len(runes) { + start = i + 1 + } - start = i + 1 + continue outer } - - continue } } if start < 0 { @@ -224,7 +260,7 @@ func (la *LineArray) insert(pos Loc, value []byte) { } // Inserts a rune array at a given location -func (la *LineArray) insertRunes(pos Loc, runes []rune) { +func (la *LineArray) insertRunes(pos Loc, runes []Character) { la.lines[pos.Y].runes = append(la.lines[pos.Y].runes, runes...) copy(la.lines[pos.Y].runes[pos.X+len(runes):], la.lines[pos.Y].runes[pos.X:]) copy(la.lines[pos.Y].runes[pos.X:], runes) @@ -289,17 +325,33 @@ func (la *LineArray) deleteLines(y1, y2 int) { func (la *LineArray) Substr(start, end Loc) []byte { startX := util.Min(start.X, len(la.lines[start.Y].runes)) endX := util.Min(end.X, len(la.lines[end.Y].runes)) + var runes []rune if start.Y == end.Y && startX <= endX { - return []byte(string(la.lines[start.Y].runes[startX:endX])) + for _, r := range la.lines[start.Y].runes[startX:endX] { + runes = append(runes, r.combc[0:]...) + } + return []byte(string(runes)) } + var str []byte - str = append(str, []byte(string(la.lines[start.Y].runes[startX:]))...) + for _, r := range la.lines[start.Y].runes[startX:] { + runes = append(runes, r.combc[0:]...) + } + str = append(str, []byte(string(runes))...) str = append(str, '\n') for i := start.Y + 1; i <= end.Y-1; i++ { - str = append(str, []byte(string(la.lines[i].runes))...) + runes = runes[:0] + for _, r := range la.lines[i].runes { + runes = append(runes, r.combc[0:]...) + } + str = append(str, []byte(string(runes))...) str = append(str, '\n') } - str = append(str, []byte(string(la.lines[end.Y].runes[:endX]))...) + runes = runes[:0] + for _, r := range la.lines[end.Y].runes[:endX] { + runes = append(runes, r.combc[0:]...) + } + str = append(str, []byte(string(runes))...) return str } @@ -324,7 +376,13 @@ func (la *LineArray) Line(n int) []rune { if n >= len(la.lines) || n < 0 { return []rune{} } - return la.lines[n].runes + + var runes []rune + for _, r := range la.lines[n].runes { + runes = append(runes, r.combc[0:]...) + } + + return runes } // LineBytes returns line n as an array of bytes @@ -332,7 +390,7 @@ func (la *LineArray) LineBytes(n int) []byte { if n >= len(la.lines) || n < 0 { return []byte{} } - return []byte(string(la.lines[n].runes)) + return la.lines[n].data() } // LineString returns line n as an string @@ -340,7 +398,13 @@ func (la *LineArray) LineString(n int) string { if n >= len(la.lines) || n < 0 { return string("") } - return string(la.lines[n].runes) + + var runes []rune + for _, r := range la.lines[n].runes { + runes = append(runes, r.combc[0:]...) + } + + return string(runes) } // State gets the highlight state for the given line number diff --git a/internal/buffer/save.go b/internal/buffer/save.go index fb98e41d83..e9a1bd1d6a 100644 --- a/internal/buffer/save.go +++ b/internal/buffer/save.go @@ -249,7 +249,7 @@ func (b *Buffer) saveToFile(filename string, withSudo bool, autoSave bool) error if !autoSave && b.Settings["rmtrailingws"].(bool) { for i, l := range b.lines { - leftover := strings.TrimRightFunc(string(l.runes), unicode.IsSpace) + leftover := strings.TrimRightFunc(l.String(), unicode.IsSpace) linelen := len(l.runes) b.Remove(Loc{len(leftover), i}, Loc{linelen, i}) } diff --git a/internal/util/unicode.go b/internal/util/unicode.go index 9c05cdfd7c..b09114316e 100644 --- a/internal/util/unicode.go +++ b/internal/util/unicode.go @@ -64,52 +64,45 @@ func DecodeCharacterInString(str string) (rune, []rune, int) { return r, combc, size } -// DecodeCharacters returns the characters from an array of bytes -func DecodeCharacters(b []byte) ([]rune, int) { - var runes []rune - size := 0 +// DecodeCombinedCharacter returns the next combined character +// from an array of bytes +// A character is a rune along with any accompanying combining runes +func DecodeCombinedCharacter(b []byte) ([]rune, int) { + var combc []rune + r, size := utf8.DecodeRune(b) + combc = append(combc, r) + b = b[size:] + c, s := utf8.DecodeRune(b) - for len(b) > 0 { - r, s := utf8.DecodeRune(b) - runes = append(runes, r) + for isMark(c) { + combc = append(combc, c) size += s - b = b[s:] - r, s = utf8.DecodeRune(b) - for isMark(r) { - runes = append(runes, r) - size += s - - b = b[s:] - r, s = utf8.DecodeRune(b) - } + b = b[s:] + c, s = utf8.DecodeRune(b) } - return runes, size + return combc, size } -// DecodeCharactersInString returns characters from a string -func DecodeCharactersInString(str string) ([]rune, int) { - var runes []rune - size := 0 +// DecodeCombinedCharacterInString is the same as DecodeCombinedCharacter +// but for strings +func DecodeCombinedCharacterInString(str string) ([]rune, int) { + var combc []rune + r, size := utf8.DecodeRuneInString(str) + combc = append(combc, r) + str = str[size:] + c, s := utf8.DecodeRuneInString(str) - for len(str) > 0 { - r, s := utf8.DecodeRuneInString(str) - runes = append(runes, r) + for isMark(c) { + combc = append(combc, c) size += s - str = str[s:] - r, s = utf8.DecodeRuneInString(str) - - for isMark(r) { - runes = append(runes, r) - size += s - str = str[s:] - r, s = utf8.DecodeRuneInString(str) - } + str = str[s:] + c, s = utf8.DecodeRuneInString(str) } - return runes, size + return combc, size } // CharacterCount returns the number of characters in a byte array From 832cce753173873ee4cc4949114dcdc3cf7c5f10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Thu, 8 Feb 2024 23:45:23 +0100 Subject: [PATCH 4/5] buffer: Improve cursor movement --- internal/buffer/cursor.go | 17 +++-------------- internal/buffer/line_array.go | 13 ++++--------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/internal/buffer/cursor.go b/internal/buffer/cursor.go index 7c229fd3e4..8511e7cbb1 100644 --- a/internal/buffer/cursor.go +++ b/internal/buffer/cursor.go @@ -601,24 +601,13 @@ func (c *Cursor) SubWordLeft() { // RuneUnder returns the rune under the given x position func (c *Cursor) RuneUnder(x int) rune { - line := c.buf.LineBytes(c.Y) - if len(line) == 0 || x >= util.CharacterCount(line) { + line := c.buf.LineCharacters(c.Y) + if len(line) == 0 || x >= len(line) { return '\n' } else if x < 0 { x = 0 } - i := 0 - for len(line) > 0 { - r, _, size := util.DecodeCharacter(line) - line = line[size:] - - if i == x { - return r - } - - i++ - } - return '\n' + return line[x].combc[0] } func (c *Cursor) StoreVisualX() { diff --git a/internal/buffer/line_array.go b/internal/buffer/line_array.go index 9630619c21..3c296fff04 100644 --- a/internal/buffer/line_array.go +++ b/internal/buffer/line_array.go @@ -371,18 +371,13 @@ func (la *LineArray) End() Loc { return Loc{len(la.lines[numlines-1].runes), numlines - 1} } -// Line returns line n as an array of runes -func (la *LineArray) Line(n int) []rune { +// LineCharacters returns line n as an array of characters +func (la *LineArray) LineCharacters(n int) []Character { if n >= len(la.lines) || n < 0 { - return []rune{} + return []Character{} } - var runes []rune - for _, r := range la.lines[n].runes { - runes = append(runes, r.combc[0:]...) - } - - return runes + return la.lines[n].runes } // LineBytes returns line n as an array of bytes From 31b26da647d94acd4b51419273890291ad959961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Wed, 21 Feb 2024 21:16:13 +0100 Subject: [PATCH 5/5] util: Let DecodeCharacter use DecodeCombinedCharacter --- internal/util/unicode.go | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/internal/util/unicode.go b/internal/util/unicode.go index b09114316e..3268e35c91 100644 --- a/internal/util/unicode.go +++ b/internal/util/unicode.go @@ -29,39 +29,15 @@ func isMark(r rune) bool { // DecodeCharacter returns the next character from an array of bytes // A character is a rune along with any accompanying combining runes func DecodeCharacter(b []byte) (rune, []rune, int) { - r, size := utf8.DecodeRune(b) - b = b[size:] - c, s := utf8.DecodeRune(b) - - var combc []rune - for isMark(c) { - combc = append(combc, c) - size += s - - b = b[s:] - c, s = utf8.DecodeRune(b) - } - - return r, combc, size + combc, size := DecodeCombinedCharacter(b) + return combc[0], combc[1:], size } // DecodeCharacterInString returns the next character from a string // A character is a rune along with any accompanying combining runes func DecodeCharacterInString(str string) (rune, []rune, int) { - r, size := utf8.DecodeRuneInString(str) - str = str[size:] - c, s := utf8.DecodeRuneInString(str) - - var combc []rune - for isMark(c) { - combc = append(combc, c) - size += s - - str = str[s:] - c, s = utf8.DecodeRuneInString(str) - } - - return r, combc, size + combc, size := DecodeCombinedCharacterInString(str) + return combc[0], combc[1:], size } // DecodeCombinedCharacter returns the next combined character