diff --git a/cmd/micro/initlua.go b/cmd/micro/initlua.go index ffe175a02d..db436deebe 100644 --- a/cmd/micro/initlua.go +++ b/cmd/micro/initlua.go @@ -141,6 +141,7 @@ func luaImportMicroBuffer() *lua.LTable { ulua.L.SetField(pkg, "ByteOffset", luar.New(ulua.L, buffer.ByteOffset)) ulua.L.SetField(pkg, "Log", luar.New(ulua.L, buffer.WriteLog)) ulua.L.SetField(pkg, "LogBuf", luar.New(ulua.L, buffer.GetLogBuf)) + ulua.L.SetField(pkg, "NewRegexpData", luar.New(ulua.L, buffer.NewRegexpData)) return pkg } diff --git a/internal/action/command.go b/internal/action/command.go index bc26f0e916..e28557b27f 100644 --- a/internal/action/command.go +++ b/internal/action/command.go @@ -1018,17 +1018,8 @@ func (h *BufPane) ReplaceCmd(args []string) { replace := []byte(replaceStr) - var regex *regexp.Regexp - var err error if h.Buf.Settings["ignorecase"].(bool) { - regex, err = regexp.Compile("(?im)" + search) - } else { - regex, err = regexp.Compile("(?m)" + search) - } - if err != nil { - // There was an error with the user's regex - InfoBar.Error(err) - return + search = "(?i)" + search } nreplaced := 0 @@ -1042,8 +1033,23 @@ func (h *BufPane) ReplaceCmd(args []string) { searchLoc = start // otherwise me might start at the end } if all { - nreplaced, _ = h.Buf.ReplaceRegex(start, end, regex, replace, !noRegex) + var err error + if noRegex { + nreplaced, _, err = h.Buf.ReplaceAllLiteral(search, start, end, replace) + } else { + nreplaced, _, err = h.Buf.ReplaceAll(search, start, end, replace) + } + if err != nil { + InfoBar.Error(err) + return + } } else { + redata, err := buffer.NewRegexpData(search) + if err != nil { + InfoBar.Error(err) + return + } + inRange := func(l buffer.Loc) bool { return l.GreaterEqual(start) && l.LessEqual(end) } @@ -1051,12 +1057,8 @@ func (h *BufPane) ReplaceCmd(args []string) { lastMatchEnd := buffer.Loc{-1, -1} var doReplacement func() doReplacement = func() { - locs, found, err := h.Buf.FindNext(search, start, end, searchLoc, true, true) - if err != nil { - InfoBar.Error(err) - return - } - if !found || !inRange(locs[0]) || !inRange(locs[1]) { + locs := h.Buf.FindRegexpDown(redata, searchLoc, end) + if locs == nil || !inRange(locs[0]) || !inRange(locs[1]) { h.Cursor.ResetSelection() h.Buf.RelocateCursors() @@ -1084,12 +1086,14 @@ func (h *BufPane) ReplaceCmd(args []string) { InfoBar.YNPrompt("Perform replacement (y,n,esc)", func(yes, canceled bool) { if !canceled && yes { - _, nrunes := h.Buf.ReplaceRegex(locs[0], locs[1], regex, replace, !noRegex) + if noRegex { + _, searchLoc, _ = h.Buf.ReplaceAllLiteral(search, locs[0], locs[1], replace) + } else { + _, searchLoc, _ = h.Buf.ReplaceAll(search, locs[0], locs[1], replace) + } - searchLoc = locs[0] - searchLoc.X += nrunes + locs[0].Diff(locs[1], h.Buf) if end.Y == locs[1].Y { - end = end.Move(nrunes, h.Buf) + end = buffer.Loc{end.X + searchLoc.X - locs[1].X, end.Y} } h.Cursor.Loc = searchLoc nreplaced++ diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index ce36988bb1..c182c48c20 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -269,13 +269,13 @@ type Buffer struct { } // NewBufferFromFileWithCommand opens a new buffer with a given command -// If cmd.StartCursor is {-1, -1} the location does not overwrite what the cursor location +// If cmd.StartCursor is invalid, the location does not overwrite what the cursor location // would otherwise be (start of file, or saved cursor position if `savecursor` is // enabled) func NewBufferFromFileWithCommand(path string, btype BufType, cmd Command) (*Buffer, error) { var err error filename := path - if config.GetGlobalOption("parsecursor").(bool) && cmd.StartCursor.X == -1 && cmd.StartCursor.Y == -1 { + if config.GetGlobalOption("parsecursor").(bool) && !cmd.StartCursor.IsValid() { var cursorPos []string filename, cursorPos = util.GetPathAndCursorPosition(filename) cmd.StartCursor, err = ParseCursorLocation(cursorPos) diff --git a/internal/buffer/eventhandler.go b/internal/buffer/eventhandler.go index e739f25011..47cb979f0c 100644 --- a/internal/buffer/eventhandler.go +++ b/internal/buffer/eventhandler.go @@ -30,6 +30,8 @@ type TextEvent struct { C Cursor EventType int + // If there are several deltas for the same line, they must not overlap + // and be ordered by increasing start position Deltas []Delta Time time.Time } @@ -114,24 +116,24 @@ func (eh *EventHandler) DoTextEvent(t *TextEvent, useUndo bool) { // ExecuteTextEvent runs a text event func ExecuteTextEvent(t *TextEvent, buf *SharedBuffer) { - if t.EventType == TextEventInsert { - for _, d := range t.Deltas { + for i := len(t.Deltas) - 1; i >= 0; i-- { + // Processing the deltas in increasing order would require + // to recompute the positions of the later deltas + d := t.Deltas[i] + if t.EventType == TextEventInsert { buf.insert(d.Start, d.Text) - } - } else if t.EventType == TextEventRemove { - for i, d := range t.Deltas { + } else if t.EventType == TextEventRemove { t.Deltas[i].Text = buf.remove(d.Start, d.End) - } - } else if t.EventType == TextEventReplace { - for i, d := range t.Deltas { + } else { // TextEventReplace t.Deltas[i].Text = buf.remove(d.Start, d.End) buf.insert(d.Start, d.Text) t.Deltas[i].Start = d.Start t.Deltas[i].End = Loc{d.Start.X + util.CharacterCount(d.Text), d.Start.Y} } - for i, j := 0, len(t.Deltas)-1; i < j; i, j = i+1, j-1 { - t.Deltas[i], t.Deltas[j] = t.Deltas[j], t.Deltas[i] - } + } + + for i, j := 0, len(t.Deltas)-1; i < j; i, j = i+1, j-1 { + t.Deltas[i], t.Deltas[j] = t.Deltas[j], t.Deltas[i] } } @@ -195,7 +197,7 @@ func (eh *EventHandler) InsertBytes(start Loc, text []byte) { e := &TextEvent{ C: *eh.cursors[eh.active], EventType: TextEventInsert, - Deltas: []Delta{{text, start, Loc{0, 0}}}, + Deltas: []Delta{{text, start, Loc{-1, -1}}}, Time: time.Now(), } eh.DoTextEvent(e, true) diff --git a/internal/buffer/loc.go b/internal/buffer/loc.go index d59578071d..15eee7beba 100644 --- a/internal/buffer/loc.go +++ b/internal/buffer/loc.go @@ -9,6 +9,11 @@ type Loc struct { X, Y int } +// IsValid returns true if the argument is an actual buffer location +func (l Loc) IsValid() bool { + return l.X >= 0 && l.Y >= 0 +} + // LessThan returns true if b is smaller func (l Loc) LessThan(b Loc) bool { if l.Y < b.Y { diff --git a/internal/buffer/search.go b/internal/buffer/search.go index 76931ee283..34ae17739a 100644 --- a/internal/buffer/search.go +++ b/internal/buffer/search.go @@ -1,141 +1,190 @@ package buffer import ( + "fmt" "regexp" "unicode/utf8" "github.com/zyedidia/micro/v2/internal/util" ) -// We want "^" and "$" to match only the beginning/end of a line, not the -// beginning/end of the search region if it is in the middle of a line. -// In that case we use padded regexps to require a rune before or after -// the match. (This also affects other empty-string patters like "\\b".) -// The following two flags indicate the padding used. +// RegexpGroup combines a Regexp with padded versions. +type RegexpData struct { + // We want "^" and "$" to match only the beginning/end of a line, not that + // of the search region somewhere in the middle of a line. In that case we + // use padded regexps to require a rune before or after the match. (This + // also affects other empty-string patters like "\\b".) + regex [4]*regexp.Regexp +} + +// Regexp returns the Regexp determining the RegexpData +func (redata *RegexpData) Regexp() *regexp.Regexp { + return redata.regex[0] +} + const ( padStart = 1 << iota padEnd ) -func findLineParams(b *Buffer, start, end Loc, i int, r *regexp.Regexp) ([]byte, int, int, *regexp.Regexp) { - l := b.LineBytes(i) - charpos := 0 - padMode := 0 - - if i == end.Y { - nchars := util.CharacterCount(l) - end.X = util.Clamp(end.X, 0, nchars) - if end.X < nchars { - l = util.SliceStart(l, end.X+1) - padMode |= padEnd - } - } - - if i == start.Y { - nchars := util.CharacterCount(l) - start.X = util.Clamp(start.X, 0, nchars) - if start.X > 0 { - charpos = start.X - 1 - l = util.SliceEnd(l, charpos) - padMode |= padStart - } - } - - if padMode != 0 { - re, err := regexp.Compile(r.String() + `\E`) - if err == nil { - // r contains \Q without closing \E - r = re - } - - if padMode == padStart { - r = regexp.MustCompile(".(?:" + r.String() + ")") - } else if padMode == padEnd { - r = regexp.MustCompile("(?:" + r.String() + ").") - } else { - // padMode == padStart|padEnd - r = regexp.MustCompile(".(?:" + r.String() + ").") +// NewRegexpData creates RegexpData from a string +func NewRegexpData(s string) (*RegexpData, error) { + var regex [4]*regexp.Regexp + var err error + regex[0], err = regexp.Compile(s) + if err == nil { + s_e := s + `\E` + _, err_e := regexp.Compile(s_e) + if err_e == nil { + s = s_e } + regex[padStart] = regexp.MustCompile(".(?:" + s + ")") + regex[padEnd] = regexp.MustCompile("(?:" + s + ").") + regex[padStart|padEnd] = regexp.MustCompile(".(?:" + s + ").") } - - return l, charpos, padMode, r + return &RegexpData{regex}, err } -func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) { - lastcn := util.CharacterCount(b.LineBytes(b.LinesNum() - 1)) - if start.Y > b.LinesNum()-1 { - start.X = lastcn - 1 - } - if end.Y > b.LinesNum()-1 { - end.X = lastcn +func regexpData(re any) (*RegexpData, error) { + switch re := re.(type) { + case *RegexpData: + return re, nil + case string: + return NewRegexpData(re) + default: + return &RegexpData{}, fmt.Errorf(`cannot convert "%v" (of type %[1]T) to type RegexpData`, re) } - start.Y = util.Clamp(start.Y, 0, b.LinesNum()-1) - end.Y = util.Clamp(end.Y, 0, b.LinesNum()-1) +} - if start.GreaterThan(end) { - start, end = end, start - } +type bytesFind func(*regexp.Regexp, []byte) []int +func (b *Buffer) findDownFunc(redata *RegexpData, start, end Loc, find bytesFind) []Loc { for i := start.Y; i <= end.Y; i++ { - l, charpos, padMode, rPadded := findLineParams(b, start, end, i, r) + l := b.LineBytes(i) + from, to := 0, len(l) + padMode := 0 + + if i == end.Y { + nchars := util.CharacterCount(l) + end.X = util.Clamp(end.X, 0, nchars) + if end.X < nchars { + padMode |= padEnd + to = util.NextRunePos(l, util.BytePosFromCharPos(l, end.X)) + } + } - match := rPadded.FindIndex(l) + if i == start.Y { + nchars := util.CharacterCount(l) + start.X = util.Clamp(start.X, 0, nchars) + if start.X > 0 { + padMode |= padStart + from = util.PreviousRunePos(l, util.BytePosFromCharPos(l, start.X)) + } + } + + s := l[from:to] + match := find(redata.regex[padMode], s) if match != nil { if padMode&padStart != 0 { - _, size := utf8.DecodeRune(l[match[0]:]) - match[0] += size + match[0] = util.NextRunePos(s, match[0]) } if padMode&padEnd != 0 { - _, size := utf8.DecodeLastRune(l[:match[1]]) - match[1] -= size + match[1] = util.PreviousRunePos(s, match[1]) } - start := Loc{charpos + util.RunePos(l, match[0]), i} - end := Loc{charpos + util.RunePos(l, match[1]), i} - return [2]Loc{start, end}, true + return util.RangeMap(match, func(j, pos int) Loc { + if pos >= 0 { + x := util.CharacterCount(l[:from+pos]) + if j%2 == 0 { + r, _ := utf8.DecodeRune(s[pos:]) + if util.IsMark(r) { + x-- + } + } + return Loc{x, i} + } else { // start or end of unused submatch + return Loc{-1, -1} + } + }) } } - return [2]Loc{}, false + return nil } -func (b *Buffer) findUp(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) { - lastcn := util.CharacterCount(b.LineBytes(b.LinesNum() - 1)) - if start.Y > b.LinesNum()-1 { - start.X = lastcn - 1 +type bufferFind func(*Buffer, *RegexpData, Loc, Loc) []Loc + +// FindDown returns a slice containing the start and end positions +// of the first match of `re` between `start` and `end` plus those +// of all submatches (capturing groups), or nil if no match exists. +// The start and end positions of an unused submatch are invalid. +func (b *Buffer) FindDown(re string, start, end Loc) ([]Loc, error) { + redata, err := NewRegexpData(re) + if err != nil { + return nil, err } - if end.Y > b.LinesNum()-1 { - end.X = lastcn + return b.FindRegexpDown(redata, start, end), nil +} + +func (b *Buffer) FindRegexpDown(redata *RegexpData, start, end Loc) []Loc { + if start.GreaterThan(end) { + return nil + } + return b.findDownFunc(redata, start, end, (*regexp.Regexp).FindSubmatchIndex) +} + +// FindUp returns a slice containing the start and end positions +// of the last match of `re` between `start` and `end` plus those +// of all submatches (capturing groups), or nil if no match exists. +// The start and end positions of an unused submatch are invalid. +func (b *Buffer) FindUp(re string, start, end Loc) ([]Loc, error) { + redata, err := NewRegexpData(re) + if err != nil { + return nil, err } - start.Y = util.Clamp(start.Y, 0, b.LinesNum()-1) - end.Y = util.Clamp(end.Y, 0, b.LinesNum()-1) + return b.FindRegexpUp(redata, start, end), nil +} +func (b *Buffer) FindRegexpUp(redata *RegexpData, start, end Loc) []Loc { if start.GreaterThan(end) { - start, end = end, start + return nil } + var locs []Loc for i := end.Y; i >= start.Y; i-- { charCount := util.CharacterCount(b.LineBytes(i)) from := Loc{0, i}.Clamp(start, end) to := Loc{charCount, i}.Clamp(start, end) - allMatches := b.findAll(r, from, to) - if allMatches != nil { - match := allMatches[len(allMatches)-1] - return [2]Loc{match[0], match[1]}, true + b.findAllFuncFunc(redata, from, to, func(b *Buffer, redata *RegexpData, start, end Loc) []Loc { + return b.findDownFunc(redata, start, end, func(r *regexp.Regexp, l []byte) []int { + allMatches := r.FindAllSubmatchIndex(l, -1) + if allMatches != nil { + return allMatches[len(allMatches)-1] + } else { + return nil + } + }) + }, func(match []Loc) { + locs = match + }) + + if locs != nil { + return locs } } - return [2]Loc{}, false + return nil } -func (b *Buffer) findAll(r *regexp.Regexp, start, end Loc) [][2]Loc { - var matches [][2]Loc +func (b *Buffer) findAllFuncFunc(redata *RegexpData, start, end Loc, find bufferFind, f func([]Loc)) int { + n := 0 loc := start for { - match, found := b.findDown(r, loc, end) - if !found { + match := find(b, redata, loc, end) + if match == nil { break } - matches = append(matches, match) + n++ + f(match) if match[0] != match[1] { loc = match[1] } else if match[1] != end { @@ -144,7 +193,45 @@ func (b *Buffer) findAll(r *regexp.Regexp, start, end Loc) [][2]Loc { break } } - return matches + return n +} + +// FindAllFunc calls the function `f` once for each match between +// `start` and `end` of the regexp given by `re`. The argument of `f` is the +// slice containing the start and end positions of the match and all submatches +// (capturing groups). FindAllFunc returns the number of matches plus +// any error that occured when compiling the regexp. +func (b *Buffer) FindAllFunc(re string, start, end Loc, f func([]Loc)) (int, error) { + redata, err := NewRegexpData(re) + if err != nil { + return -1, err + } + return b.findAllFuncFunc(redata, start, end, (*Buffer).FindRegexpDown, f), nil +} + +// FindAll returns a slice containing the start and end positions of +// all matches and all submatches (capturing groups) between `start` and `end` +// of the regexp given by `re`, plus any error that occured when compiling +// the regexp. If no match is found, the slice returned is nil. +func (b *Buffer) FindAll(re string, start, end Loc) ([][]Loc, error) { + var matches [][]Loc + _, err := b.FindAllFunc(re, start, end, func(match []Loc) { + matches = append(matches, match) + }) + return matches, err +} + +// MatchedStrings converts a slice containing start and end positions of +// matches or submatches to a slice containing the corresponding strings. +// Unused submatches are converted to empty strings. +func (b *Buffer) MatchedStrings(locs []Loc) []string { + strs := make([]string, len(locs)/2) + for i := 0; 2*i < len(locs); i += 2 { + if locs[2*i].IsValid() { + strs[i] = string(b.Substr(locs[2*i], locs[2*i+1])) + } + } + return strs } // FindNext finds the next occurrence of a given string in the buffer @@ -156,92 +243,113 @@ func (b *Buffer) FindNext(s string, start, end, from Loc, down bool, useRegex bo return [2]Loc{}, false, nil } - var r *regexp.Regexp - var err error - if !useRegex { s = regexp.QuoteMeta(s) } if b.Settings["ignorecase"].(bool) { - r, err = regexp.Compile("(?i)" + s) - } else { - r, err = regexp.Compile(s) + s = "(?i)" + s } + redata, err := NewRegexpData(s) if err != nil { return [2]Loc{}, false, err } - var found bool - var l [2]Loc + if start.GreaterThan(end) { + start, end = end, start + } + + var match []Loc if down { - l, found = b.findDown(r, from, end) - if !found { - l, found = b.findDown(r, start, end) + match = b.FindRegexpDown(redata, from, end) + if match == nil { + match = b.FindRegexpDown(redata, start, end) } } else { - l, found = b.findUp(r, from, start) - if !found { - l, found = b.findUp(r, end, start) + match = b.FindRegexpUp(redata, start, from) + if match == nil { + match = b.FindRegexpUp(redata, start, end) } } - return l, found, nil + if match != nil { + return [2]Loc{match[0], match[1]}, true, nil + } else { + return [2]Loc{}, false, nil + } } -// ReplaceRegex replaces all occurrences of 'search' with 'replace' in the given area -// and returns the number of replacements made and the number of characters -// added or removed on the last line of the range -func (b *Buffer) ReplaceRegex(start, end Loc, search *regexp.Regexp, replace []byte, captureGroups bool) (int, int) { - if start.GreaterThan(end) { - start, end = end, start +// Expand returns the template, with variables replaced by submatches. +// It is analogous to `(*regexp.Regexp).Expand` +func (b *Buffer) Expand(re any, template []byte, match []Loc) (string, error) { + redata, err := regexpData(re) + if err != nil { + return "", err + } + l := b.LineBytes(match[0].Y) + m := util.RangeMap(match, func(_ int, pos Loc) int { + return util.BytePosFromCharPos(l, pos.X) + }) + return string(redata.Regexp().Expand(nil, template, l, m)), nil +} + +func (b *Buffer) replaceAllFuncFunc(re string, start, end Loc, find bufferFind, repl func(match []Loc) []byte) (int, Loc, error) { + redata, err := NewRegexpData(re) + if err != nil { + return -1, Loc{-1, -1}, err } charsEnd := util.CharacterCount(b.LineBytes(end.Y)) - found := 0 var deltas []Delta - for i := start.Y; i <= end.Y; i++ { - l := b.LineBytes(i) - charCount := util.CharacterCount(l) - if (i == start.Y && start.X > 0) || (i == end.Y && end.X < charCount) { - // This replacement code works in general, but it creates a separate - // modification for each match. We only use it for the first and last - // lines, which may use padded regexps - - from := Loc{0, i}.Clamp(start, end) - to := Loc{charCount, i}.Clamp(start, end) - matches := b.findAll(search, from, to) - found += len(matches) - - for j := len(matches) - 1; j >= 0; j-- { - // if we counted upwards, the different deltas would interfere - match := matches[j] - var newText []byte - if captureGroups { - newText = search.ReplaceAll(b.Substr(match[0], match[1]), replace) - } else { - newText = replace - } - deltas = append(deltas, Delta{newText, match[0], match[1]}) + n := b.findAllFuncFunc(redata, start, end, find, func(match []Loc) { + deltas = append(deltas, Delta{repl(match), match[0], match[1]}) + }) + + b.MultipleReplace(deltas) + + deltaX := util.CharacterCount(b.LineBytes(end.Y)) - charsEnd + return n, Loc{end.X + deltaX, end.Y}, nil +} + +// ReplaceAll replaces all matches of the regexp `re` in the given area. The +// new text is obtained from `template` by replacing each variable with the +// corresponding submatch as in `(*regexp.Regexp).Expand`. The function +// returns the number of replacements made, the new end position and any +// error that occured during regexp compilation +func (b *Buffer) ReplaceAll(re string, start, end Loc, template []byte) (int, Loc, error) { + var replace []byte + + find := func(b *Buffer, redata *RegexpData, start, end Loc) []Loc { + return b.findDownFunc(redata, start, end, func(re *regexp.Regexp, l []byte) []int { + match := re.FindSubmatchIndex(l) + if match == nil { + return nil } - } else { - newLine := search.ReplaceAllFunc(l, func(in []byte) []byte { - found++ - var result []byte - if captureGroups { - match := search.FindSubmatchIndex(in) - result = search.Expand(result, replace, in, match) - } else { - result = replace - } - return result - }) - deltas = append(deltas, Delta{newLine, Loc{0, i}, Loc{charCount, i}}) - } + replace = re.Expand(nil, template, l, match) + return match[:2] // this way match[2:] is not transformed to Loc's + }) } - b.MultipleReplace(deltas) + return b.replaceAllFuncFunc(re, start, end, find, func(match []Loc) []byte { + return replace + }) +} + +// ReplaceAllLiteral replaces all matches of the regexp `re` with `repl` in +// the given area. The function returns the number of replacements made, the +// new end position and any error that occured during regexp compilation +func (b *Buffer) ReplaceAllLiteral(re string, start, end Loc, repl []byte) (int, Loc, error) { + return b.ReplaceAllFunc(re, start, end, func([]Loc) []byte { + return repl + }) +} - return found, util.CharacterCount(b.LineBytes(end.Y)) - charsEnd +// ReplaceAllFunc replaces all matches of the regexp `re` with +// `repl(match)` in the given area, where `match` is the slice containing +// start and end positions of the match and all submatches. The function +// returns the number of replacements made, the new end position and any +// error that occured during regexp compilation +func (b *Buffer) ReplaceAllFunc(re string, start, end Loc, repl func(match []Loc) []byte) (int, Loc, error) { + return b.replaceAllFuncFunc(re, start, end, (*Buffer).FindRegexpDown, repl) } diff --git a/internal/util/unicode.go b/internal/util/unicode.go index 14243e68be..9422e7f685 100644 --- a/internal/util/unicode.go +++ b/internal/util/unicode.go @@ -18,7 +18,8 @@ import ( var minMark = rune(unicode.Mark.R16[0].Lo) -func isMark(r rune) bool { +// IsMark returns true if `rune` is a combining rune +func IsMark(r rune) bool { // Fast path if r < minMark { return false @@ -26,6 +27,28 @@ func isMark(r rune) bool { return unicode.In(r, unicode.Mark) } +// PreviousRunePos returns the position of the rune preceding the one starting +// at `i` in the given byte slice, or -1 if there is no valid rune +func PreviousRunePos(b []byte, i int) int { + r, size := utf8.DecodeLastRune(b[:i]) + if r == utf8.RuneError { + return -1 + } else { + return i - size + } +} + +// NextRunePos returns the position of the rune following the one starting +// at `i` in the given byte slice, or -1 if there is no valid rune +func NextRunePos(b []byte, i int) int { + r, size := utf8.DecodeRune(b[i:]) + if r == utf8.RuneError { + return -1 + } else { + return i + size + } +} + // DecodeCharacter returns the next character from an array of bytes // A character is a rune along with any accompanying combining runes func DecodeCharacter(b []byte) (rune, []rune, int) { @@ -34,7 +57,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) { c, s := utf8.DecodeRune(b) var combc []rune - for isMark(c) { + for IsMark(c) { combc = append(combc, c) size += s @@ -53,7 +76,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) { c, s := utf8.DecodeRuneInString(str) var combc []rune - for isMark(c) { + for IsMark(c) { combc = append(combc, c) size += s @@ -71,7 +94,7 @@ func CharacterCount(b []byte) int { for len(b) > 0 { r, size := utf8.DecodeRune(b) - if !isMark(r) { + if !IsMark(r) { s++ } @@ -87,10 +110,28 @@ func CharacterCountInString(str string) int { s := 0 for _, r := range str { - if !isMark(r) { + if !IsMark(r) { s++ } } return s } + +// BytePosFromCharPos returns the position of the byte in `b` that +// starts first rune of the character indexed by `ci`. If `ci` is +// not a valid position, then -1 is returned +func BytePosFromCharPos(b []byte, ci int) int { + if ci < 0 { + return -1 + } + i := 0 + for j := 0; j < ci; j++ { + if i >= len(b) { + return -1 + } + _, _, size := DecodeCharacter(b[i:]) + i += size + } + return i +} diff --git a/internal/util/util.go b/internal/util/util.go index e0ae62f287..f2a2d21903 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -100,6 +100,20 @@ func init() { Stdout = new(bytes.Buffer) } +// RangeMap returns the slice obtained from applying the given function +// to all elements of the argument slice, with the slice index as additional +// argument. Nil values are preserved +func RangeMap[T, V any](ts []T, f func(int, T) V) []V { + if ts == nil { + return nil + } + vs := make([]V, len(ts)) + for i, t := range ts { + vs[i] = f(i, t) + } + return vs +} + // SliceEnd returns a byte slice where the index is a rune index // Slices off the start of the slice func SliceEnd(slc []byte, index int) []byte { @@ -355,12 +369,6 @@ func IsBytesWhitespace(b []byte) bool { return true } -// RunePos returns the rune index of a given byte index -// Make sure the byte index is not between code points -func RunePos(b []byte, i int) int { - return CharacterCount(b[:i]) -} - // IndexAnyUnquoted returns the first position in s of a character from chars. // Escaped (with backslash) and quoted (with single or double quotes) characters // are ignored. Returns -1 if not successful diff --git a/runtime/help/plugins.md b/runtime/help/plugins.md index 11170507e9..5107c8a026 100644 --- a/runtime/help/plugins.md +++ b/runtime/help/plugins.md @@ -344,6 +344,9 @@ The packages and their contents are listed below (in Go type signatures): - `Log(s string)`: writes a string to the log buffer. - `LogBuf() *Buffer`: returns the log buffer. + - `NewRegexpGroup(s string)`: creates a `RegexpGroup`, which is used for + searching a buffer. + Relevant links: [Message](https://pkg.go.dev/github.com/zyedidia/micro/v2/internal/buffer#Message) [Loc](https://pkg.go.dev/github.com/zyedidia/micro/v2/internal/buffer#Loc)