diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 2735ca467..787c9ec46 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -165,7 +165,7 @@ func (b *SharedBuffer) calcHash(out *[md5.Size]byte) { h := md5.New() if len(b.lines) > 0 { - h.Write(b.lines[0].data) + h.Write(b.lines[0].data()) for _, l := range b.lines[1:] { if b.Endings == FFDos { @@ -173,7 +173,7 @@ func (b *SharedBuffer) calcHash(out *[md5.Size]byte) { } else { h.Write([]byte{'\n'}) } - h.Write(l.data) + h.Write(l.data()) } } @@ -866,7 +866,7 @@ func (b *Buffer) UpdateRules() { if header.MatchFileName(b.Path) { matchedFileName = true } - if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data) { + if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data()) { matchedFileHeader = true } } else if header.FileType == ft { @@ -920,7 +920,7 @@ func (b *Buffer) UpdateRules() { if header.MatchFileName(b.Path) { fnameMatches = append(fnameMatches, syntaxFileInfo{header, f.Name(), nil}) } - if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data) { + if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data()) { headerMatches = append(headerMatches, syntaxFileInfo{header, f.Name(), nil}) } } else if header.FileType == ft { @@ -953,7 +953,7 @@ func (b *Buffer) UpdateRules() { for _, m := range matches { if m.header.HasFileSignature() { for i := 0; i < limit; i++ { - if m.header.MatchFileSignature(b.lines[i].data) { + if m.header.MatchFileSignature(b.lines[i].data()) { syntaxFile = m.fileName if m.syntaxDef != nil { b.SyntaxDef = m.syntaxDef @@ -1130,11 +1130,11 @@ func (b *Buffer) MoveLinesUp(start int, end int) { if start < 1 || start >= end || end > len(b.lines) { return } - l := string(b.LineBytes(start - 1)) + l := b.LineString(start - 1) if end == len(b.lines) { b.insert( Loc{ - util.CharacterCount(b.lines[end-1].data), + len(b.lines[end-1].runes), end - 1, }, []byte{'\n'}, @@ -1155,7 +1155,7 @@ func (b *Buffer) MoveLinesDown(start int, end int) { if start < 0 || start >= end || end >= len(b.lines) { return } - l := string(b.LineBytes(end)) + l := b.LineString(end) b.Insert( Loc{0, start}, l+"\n", @@ -1196,7 +1196,7 @@ func (b *Buffer) findMatchingBrace(braceType [2]rune, start Loc, char rune) (Loc } } else if char == braceType[1] { for y := start.Y; y >= 0; y-- { - l := []rune(string(b.lines[y].data)) + l := []rune(string(b.LineBytes(y))) xInit := len(l) - 1 if y == start.Y { xInit = start.X @@ -1281,7 +1281,14 @@ func (b *Buffer) Retab() { l = bytes.TrimLeft(l, " \t") b.Lock() - b.lines[i].data = append(ws, l...) + ws = append(ws, l...) + var runes []Character + for len(ws) > 0 { + combc, s := util.DecodeCombinedCharacter(ws) + runes = append(runes, Character{combc}) + ws = ws[s:] + } + b.lines[i].runes = runes b.Unlock() b.MarkModified(i, i) @@ -1317,7 +1324,7 @@ func ParseCursorLocation(cursorPositions []string) (Loc, error) { // Line returns the string representation of the given line number func (b *Buffer) Line(i int) string { - return string(b.LineBytes(i)) + return b.LineString(i) } func (b *Buffer) Write(bytes []byte) (n int, err error) { diff --git a/internal/buffer/cursor.go b/internal/buffer/cursor.go index 7c229fd3e..8511e7cbb 100644 --- a/internal/buffer/cursor.go +++ b/internal/buffer/cursor.go @@ -601,24 +601,13 @@ func (c *Cursor) SubWordLeft() { // RuneUnder returns the rune under the given x position func (c *Cursor) RuneUnder(x int) rune { - line := c.buf.LineBytes(c.Y) - if len(line) == 0 || x >= util.CharacterCount(line) { + line := c.buf.LineCharacters(c.Y) + if len(line) == 0 || x >= len(line) { return '\n' } else if x < 0 { x = 0 } - i := 0 - for len(line) > 0 { - r, _, size := util.DecodeCharacter(line) - line = line[size:] - - if i == x { - return r - } - - i++ - } - return '\n' + return line[x].combc[0] } func (c *Cursor) StoreVisualX() { diff --git a/internal/buffer/line_array.go b/internal/buffer/line_array.go index eecb6b590..3c296fff0 100644 --- a/internal/buffer/line_array.go +++ b/internal/buffer/line_array.go @@ -10,28 +10,6 @@ import ( "github.com/micro-editor/micro/v2/pkg/highlight" ) -// Finds the byte index of the nth rune in a byte slice -func runeToByteIndex(n int, txt []byte) int { - if n == 0 { - return 0 - } - - count := 0 - i := 0 - for len(txt) > 0 { - _, _, size := util.DecodeCharacter(txt) - - txt = txt[size:] - count += size - i++ - - if i == n { - break - } - } - return count -} - // A searchState contains the search match info for a single line type searchState struct { search string @@ -41,10 +19,14 @@ type searchState struct { done bool } -// A Line contains the data in bytes as well as a highlight state, match +type Character struct { + combc []rune +} + +// A Line contains the slice of runes as well as a highlight state, match // and a flag for whether the highlighting needs to be updated type Line struct { - data []byte + runes []Character state highlight.State match highlight.LineMatch @@ -59,6 +41,24 @@ type Line struct { search map[*Buffer]*searchState } +// data returns the line as byte slice +func (l Line) data() []byte { + var runes []rune + for _, r := range l.runes { + runes = append(runes, r.combc[0:]...) + } + return []byte(string(runes)) +} + +// String returns the line as string +func (l Line) String() string { + var runes []rune + for _, r := range l.runes { + runes = append(runes, r.combc[0:]...) + } + return string(runes) +} + const ( // Line ending file formats FFAuto = 0 // Autodetect format @@ -94,7 +94,7 @@ func Append(slice []Line, data ...Line) []Line { return slice } -// NewLineArray returns a new line array from an array of bytes +// NewLineArray returns a new line array from an array of runes func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray { la := new(LineArray) @@ -144,10 +144,16 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray loaded += dlen } + var runes []Character if err != nil { if err == io.EOF { + for len(data) > 0 { + combc, s := util.DecodeCombinedCharacter(data) + runes = append(runes, Character{combc}) + data = data[s:] + } la.lines = Append(la.lines, Line{ - data: data, + runes: runes, state: nil, match: nil, }) @@ -155,8 +161,14 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray // Last line was read break } else { + data = data[:dlen-1] + for len(data) > 0 { + combc, s := util.DecodeCombinedCharacter(data) + runes = append(runes, Character{combc}) + data = data[s:] + } la.lines = Append(la.lines, Line{ - data: data[:dlen-1], + runes: runes, state: nil, match: nil, }) @@ -174,7 +186,7 @@ func (la *LineArray) Bytes() []byte { // initsize should provide a good estimate b.Grow(int(la.initsize + 4096)) for i, l := range la.lines { - b.Write(l.data) + b.Write(l.data()) if i != len(la.lines)-1 { if la.Endings == FFDos { b.WriteByte('\r') @@ -188,13 +200,13 @@ func (la *LineArray) Bytes() []byte { // newlineBelow adds a newline below the given line number func (la *LineArray) newlineBelow(y int) { la.lines = append(la.lines, Line{ - data: []byte{' '}, + runes: []Character{}, state: nil, match: nil, }) copy(la.lines[y+2:], la.lines[y+1:]) la.lines[y+1] = Line{ - data: []byte{}, + runes: []Character{}, state: la.lines[y].state, match: nil, } @@ -205,41 +217,65 @@ func (la *LineArray) insert(pos Loc, value []byte) { la.lock.Lock() defer la.lock.Unlock() - x, y := runeToByteIndex(pos.X, la.lines[pos.Y].data), pos.Y - for i := 0; i < len(value); i++ { - if value[i] == '\n' || (value[i] == '\r' && i < len(value)-1 && value[i+1] == '\n') { - la.split(Loc{x, y}) - x = 0 - y++ - - if value[i] == '\r' { - i++ + var runes []Character + for len(value) > 0 { + combc, s := util.DecodeCombinedCharacter(value) + runes = append(runes, Character{combc}) + value = value[s:] + } + x, y := util.Min(pos.X, len(la.lines[pos.Y].runes)), pos.Y + start := -1 + +outer: + for i, r := range runes { + for j := 0; j < len(r.combc); j++ { + if r.combc[j] == '\n' || (r.combc[j] == '\r' && i < len(runes)-1 && r.combc[j+1] == '\n') { + la.split(Loc{x, y}) + if i > 0 && start < len(runes) && start < i { + if start < 0 { + start = 0 + } + la.insertRunes(Loc{x, y}, runes[start:i]) + } + + x = 0 + y++ + + if r.combc[j] == '\r' { + i++ + } + if i+1 <= len(runes) { + start = i + 1 + } + + continue outer } - - continue } - la.insertByte(Loc{x, y}, value[i]) - x++ + } + if start < 0 { + la.insertRunes(Loc{x, y}, runes) + } else if start < len(runes) { + la.insertRunes(Loc{x, y}, runes[start:]) } } -// InsertByte inserts a byte at a given location -func (la *LineArray) insertByte(pos Loc, value byte) { - la.lines[pos.Y].data = append(la.lines[pos.Y].data, 0) - copy(la.lines[pos.Y].data[pos.X+1:], la.lines[pos.Y].data[pos.X:]) - la.lines[pos.Y].data[pos.X] = value +// Inserts a rune array at a given location +func (la *LineArray) insertRunes(pos Loc, runes []Character) { + la.lines[pos.Y].runes = append(la.lines[pos.Y].runes, runes...) + copy(la.lines[pos.Y].runes[pos.X+len(runes):], la.lines[pos.Y].runes[pos.X:]) + copy(la.lines[pos.Y].runes[pos.X:], runes) } // joinLines joins the two lines a and b func (la *LineArray) joinLines(a, b int) { - la.lines[a].data = append(la.lines[a].data, la.lines[b].data...) + la.insertRunes(Loc{len(la.lines[a].runes), a}, la.lines[b].runes) la.deleteLine(b) } // split splits a line at a given position func (la *LineArray) split(pos Loc) { la.newlineBelow(pos.Y) - la.lines[pos.Y+1].data = append(la.lines[pos.Y+1].data, la.lines[pos.Y].data[pos.X:]...) + la.insertRunes(Loc{0, pos.Y + 1}, la.lines[pos.Y].runes[pos.X:]) la.lines[pos.Y+1].state = la.lines[pos.Y].state la.lines[pos.Y].state = nil la.lines[pos.Y].match = nil @@ -253,10 +289,10 @@ func (la *LineArray) remove(start, end Loc) []byte { defer la.lock.Unlock() sub := la.Substr(start, end) - startX := runeToByteIndex(start.X, la.lines[start.Y].data) - endX := runeToByteIndex(end.X, la.lines[end.Y].data) + startX := util.Min(start.X, len(la.lines[start.Y].runes)) + endX := util.Min(end.X, len(la.lines[end.Y].runes)) if start.Y == end.Y { - la.lines[start.Y].data = append(la.lines[start.Y].data[:startX], la.lines[start.Y].data[endX:]...) + la.lines[start.Y].runes = append(la.lines[start.Y].runes[:startX], la.lines[start.Y].runes[endX:]...) } else { la.deleteLines(start.Y+1, end.Y-1) la.deleteToEnd(Loc{startX, start.Y}) @@ -268,12 +304,12 @@ func (la *LineArray) remove(start, end Loc) []byte { // deleteToEnd deletes from the end of a line to the position func (la *LineArray) deleteToEnd(pos Loc) { - la.lines[pos.Y].data = la.lines[pos.Y].data[:pos.X] + la.lines[pos.Y].runes = la.lines[pos.Y].runes[:pos.X] } // deleteFromStart deletes from the start of a line to the position func (la *LineArray) deleteFromStart(pos Loc) { - la.lines[pos.Y].data = la.lines[pos.Y].data[pos.X+1:] + la.lines[pos.Y].runes = la.lines[pos.Y].runes[pos.X+1:] } // deleteLine deletes the line number @@ -287,22 +323,35 @@ func (la *LineArray) deleteLines(y1, y2 int) { // Substr returns the string representation between two locations func (la *LineArray) Substr(start, end Loc) []byte { - startX := runeToByteIndex(start.X, la.lines[start.Y].data) - endX := runeToByteIndex(end.X, la.lines[end.Y].data) - if start.Y == end.Y { - src := la.lines[start.Y].data[startX:endX] - dest := make([]byte, len(src)) - copy(dest, src) - return dest + startX := util.Min(start.X, len(la.lines[start.Y].runes)) + endX := util.Min(end.X, len(la.lines[end.Y].runes)) + var runes []rune + if start.Y == end.Y && startX <= endX { + for _, r := range la.lines[start.Y].runes[startX:endX] { + runes = append(runes, r.combc[0:]...) + } + return []byte(string(runes)) } - str := make([]byte, 0, len(la.lines[start.Y+1].data)*(end.Y-start.Y)) - str = append(str, la.lines[start.Y].data[startX:]...) + + var str []byte + for _, r := range la.lines[start.Y].runes[startX:] { + runes = append(runes, r.combc[0:]...) + } + str = append(str, []byte(string(runes))...) str = append(str, '\n') for i := start.Y + 1; i <= end.Y-1; i++ { - str = append(str, la.lines[i].data...) + runes = runes[:0] + for _, r := range la.lines[i].runes { + runes = append(runes, r.combc[0:]...) + } + str = append(str, []byte(string(runes))...) str = append(str, '\n') } - str = append(str, la.lines[end.Y].data[:endX]...) + runes = runes[:0] + for _, r := range la.lines[end.Y].runes[:endX] { + runes = append(runes, r.combc[0:]...) + } + str = append(str, []byte(string(runes))...) return str } @@ -319,15 +368,38 @@ func (la *LineArray) Start() Loc { // End returns the location of the last character in the buffer func (la *LineArray) End() Loc { numlines := len(la.lines) - return Loc{util.CharacterCount(la.lines[numlines-1].data), numlines - 1} + return Loc{len(la.lines[numlines-1].runes), numlines - 1} +} + +// LineCharacters returns line n as an array of characters +func (la *LineArray) LineCharacters(n int) []Character { + if n >= len(la.lines) || n < 0 { + return []Character{} + } + + return la.lines[n].runes } // LineBytes returns line n as an array of bytes -func (la *LineArray) LineBytes(lineN int) []byte { - if lineN >= len(la.lines) || lineN < 0 { +func (la *LineArray) LineBytes(n int) []byte { + if n >= len(la.lines) || n < 0 { return []byte{} } - return la.lines[lineN].data + return la.lines[n].data() +} + +// LineString returns line n as an string +func (la *LineArray) LineString(n int) string { + if n >= len(la.lines) || n < 0 { + return string("") + } + + var runes []rune + for _, r := range la.lines[n].runes { + runes = append(runes, r.combc[0:]...) + } + + return string(runes) } // State gets the highlight state for the given line number @@ -409,7 +481,7 @@ func (la *LineArray) SearchMatch(b *Buffer, pos Loc) bool { if !s.done { s.match = nil start := Loc{0, lineN} - end := Loc{util.CharacterCount(la.lines[lineN].data), lineN} + end := Loc{len(la.lines[lineN].runes), lineN} for start.X < end.X { m, found, _ := b.FindNext(b.LastSearch, start, end, start, true, b.LastSearchRegex) if !found { diff --git a/internal/buffer/save.go b/internal/buffer/save.go index 44e8f4a3e..e9a1bd1d6 100644 --- a/internal/buffer/save.go +++ b/internal/buffer/save.go @@ -2,7 +2,6 @@ package buffer import ( "bufio" - "bytes" "errors" "io" "io/fs" @@ -11,6 +10,7 @@ import ( "os/signal" "path/filepath" "runtime" + "strings" "time" "unicode" @@ -156,7 +156,7 @@ func (wf wrappedFile) Write(b *SharedBuffer) (int, error) { } // write lines - size, err := file.Write(b.lines[0].data) + size, err := file.Write(b.lines[0].data()) if err != nil { return 0, err } @@ -165,10 +165,10 @@ func (wf wrappedFile) Write(b *SharedBuffer) (int, error) { if _, err = file.Write(eol); err != nil { return 0, err } - if _, err = file.Write(l.data); err != nil { + if _, err = file.Write(l.data()); err != nil { return 0, err } - size += len(eol) + len(l.data) + size += len(eol) + len(l.data()) } err = file.Flush() @@ -249,10 +249,9 @@ func (b *Buffer) saveToFile(filename string, withSudo bool, autoSave bool) error if !autoSave && b.Settings["rmtrailingws"].(bool) { for i, l := range b.lines { - leftover := util.CharacterCount(bytes.TrimRightFunc(l.data, unicode.IsSpace)) - - linelen := util.CharacterCount(l.data) - b.Remove(Loc{leftover, i}, Loc{linelen, i}) + leftover := strings.TrimRightFunc(l.String(), unicode.IsSpace) + linelen := len(l.runes) + b.Remove(Loc{len(leftover), i}, Loc{linelen, i}) } b.RelocateCursors() diff --git a/internal/util/unicode.go b/internal/util/unicode.go index 14243e68b..3268e35c9 100644 --- a/internal/util/unicode.go +++ b/internal/util/unicode.go @@ -29,11 +29,27 @@ func isMark(r rune) bool { // DecodeCharacter returns the next character from an array of bytes // A character is a rune along with any accompanying combining runes func DecodeCharacter(b []byte) (rune, []rune, int) { + combc, size := DecodeCombinedCharacter(b) + return combc[0], combc[1:], size +} + +// DecodeCharacterInString returns the next character from a string +// A character is a rune along with any accompanying combining runes +func DecodeCharacterInString(str string) (rune, []rune, int) { + combc, size := DecodeCombinedCharacterInString(str) + return combc[0], combc[1:], size +} + +// DecodeCombinedCharacter returns the next combined character +// from an array of bytes +// A character is a rune along with any accompanying combining runes +func DecodeCombinedCharacter(b []byte) ([]rune, int) { + var combc []rune r, size := utf8.DecodeRune(b) + combc = append(combc, r) b = b[size:] c, s := utf8.DecodeRune(b) - var combc []rune for isMark(c) { combc = append(combc, c) size += s @@ -42,17 +58,18 @@ func DecodeCharacter(b []byte) (rune, []rune, int) { c, s = utf8.DecodeRune(b) } - return r, combc, size + return combc, size } -// DecodeCharacterInString returns the next character from a string -// A character is a rune along with any accompanying combining runes -func DecodeCharacterInString(str string) (rune, []rune, int) { +// DecodeCombinedCharacterInString is the same as DecodeCombinedCharacter +// but for strings +func DecodeCombinedCharacterInString(str string) ([]rune, int) { + var combc []rune r, size := utf8.DecodeRuneInString(str) + combc = append(combc, r) str = str[size:] c, s := utf8.DecodeRuneInString(str) - var combc []rune for isMark(c) { combc = append(combc, c) size += s @@ -61,7 +78,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) { c, s = utf8.DecodeRuneInString(str) } - return r, combc, size + return combc, size } // CharacterCount returns the number of characters in a byte array