// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package text // TODO: do we care about "\n" vs "\r" vs "\r\n"? We only recognize "\n" for // now. import ( "bytes" "errors" "io" "strings" "unicode/utf8" "golang.org/x/image/math/fixed" ) // Caret is a location in a Frame's text, and is the mechanism for adding and // removing bytes of text. Conceptually, a Caret and a Frame's text is like an // int c and a []byte t such that the text before and after that Caret is t[:c] // and t[c:]. That byte-count location remains unchanged even when a Frame is // re-sized and laid out into a new tree of Paragraphs, Lines and Boxes. // // A Frame can have multiple open Carets. For example, the beginning and end of // a text selection can be represented by two Carets. Multiple Carets for the // one Frame are not safe to use concurrently, but it is valid to interleave // such operations sequentially. For example, if two Carets c0 and c1 for the // one Frame are positioned at the 10th and 20th byte, and 4 bytes are written // to c0, inserting what becomes the equivalent of text[10:14], then c0's // position is updated to be 14 but c1's position is also updated to be 24. type Caret struct { f *Frame // caretsIndex is the index of this Caret in the f.carets slice. caretsIndex int // seqNum is the Frame f's sequence number for which this Caret's cached p, // l, b and k fields are valid. If f has been modified since then, those // fields will have to be re-calculated based on the pos field (which is // always valid). // // TODO: when re-calculating p, l, b and k, be more efficient than a linear // scan from the start or end? seqNum uint64 // p, l and b cache the index of the Caret's Paragraph, Line and Box. None // of these values can be zero. p, l, b int32 // k caches the Caret's position in the text, in Frame.text order. It is // valid to index the Frame.text slice with k, analogous to the Box.i and // Box.j fields. For a Caret c, letting bb := c.f.boxes[c.b], an invariant // is that bb.i <= c.k && c.k <= bb.j if the cache is valid (i.e. the // Caret's seqNum equals the Frame's seqNum). k int32 // pos is the Caret's position in the text, in layout order. It is the "c" // as in "t[:c]" in the doc comment for type Caret above. It is not valid // to index the Frame.text slice with pos, since the Frame.text slice does // not necessarily hold the textual content in layout order. pos int32 tmp [utf8.UTFMax]byte } // Close closes the Caret. func (c *Caret) Close() error { i, j := c.caretsIndex, len(c.f.carets)-1 // Swap c with the last element of c.f.carets. if i != j { other := c.f.carets[j] other.caretsIndex = i c.f.carets[i] = other } c.f.carets[j] = nil c.f.carets = c.f.carets[:j] *c = Caret{} return nil } type leanResult int const ( // leanOK means that the lean changed the Caret's Box. leanOK leanResult = iota // leanFailedEOF means that the lean did not change the Caret's Box, // because the Caret was already at the end / beginning of the Frame (when // leaning forwards / backwards). leanFailedEOF // leanFailedNotEOB means that the lean did not change the Caret's Box, // because the Caret was not placed at the end / beginning of the Box (when // leaning forwards / backwards). leanFailedNotEOB ) // leanForwards moves the Caret from the right end of one Box to the left end // of the next Box, crossing Lines and Paragraphs to find that next Box. It // returns whether the Caret moved to a different Box. // // Diagramatically, suppose we have two adjacent boxes (shown by square // brackets below), with the Caret (an integer location called Caret.pos in the // Frame's text) in the middle of the "foo2bar3" word: // [foo0 foo1 foo2]^[bar3 bar4 bar5] // leanForwards moves Caret.k from fooBox.j to barBox.i, also updating the // Caret's p, l and b. Caret.pos remains unchanged. func (c *Caret) leanForwards() leanResult { if c.k != c.f.boxes[c.b].j { return leanFailedNotEOB } if nextB := c.f.boxes[c.b].next; nextB != 0 { c.b = nextB c.k = c.f.boxes[c.b].i return leanOK } if nextL := c.f.lines[c.l].next; nextL != 0 { c.l = nextL c.b = c.f.lines[c.l].firstB c.k = c.f.boxes[c.b].i return leanOK } if nextP := c.f.paragraphs[c.p].next; nextP != 0 { c.p = nextP c.l = c.f.paragraphs[c.p].firstL c.b = c.f.lines[c.l].firstB c.k = c.f.boxes[c.b].i return leanOK } return leanFailedEOF } // leanBackwards is like leanForwards but in the other direction. func (c *Caret) leanBackwards() leanResult { if c.k != c.f.boxes[c.b].i { return leanFailedNotEOB } if prevB := c.f.boxes[c.b].prev; prevB != 0 { c.b = prevB c.k = c.f.boxes[c.b].j return leanOK } if prevL := c.f.lines[c.l].prev; prevL != 0 { c.l = prevL c.b = c.f.lines[c.l].lastBox(c.f) c.k = c.f.boxes[c.b].j return leanOK } if prevP := c.f.paragraphs[c.p].prev; prevP != 0 { c.p = prevP c.l = c.f.paragraphs[c.p].lastLine(c.f) c.b = c.f.lines[c.l].lastBox(c.f) c.k = c.f.boxes[c.b].j return leanOK } return leanFailedEOF } func (c *Caret) seekStart() { c.p = c.f.firstP c.l = c.f.paragraphs[c.p].firstL c.b = c.f.lines[c.l].firstB c.k = c.f.boxes[c.b].i c.pos = 0 } func (c *Caret) seekEnd() { c.p = c.f.lastParagraph() c.l = c.f.paragraphs[c.p].lastLine(c.f) c.b = c.f.lines[c.l].lastBox(c.f) c.k = c.f.boxes[c.b].j c.pos = int32(c.f.len) } // calculatePLBK ensures that the Caret's cached p, l, b and k fields are // valid. func (c *Caret) calculatePLBK() { if c.seqNum != c.f.seqNum { c.seek(c.pos) } } // Seek satisfies the io.Seeker interface. func (c *Caret) Seek(offset int64, whence int) (int64, error) { switch whence { case SeekSet: // No-op. case SeekCur: offset += int64(c.pos) case SeekEnd: offset += int64(c.f.len) default: return 0, errors.New("text: invalid seek whence") } if offset < 0 { return 0, errors.New("text: negative seek position") } if offset > int64(c.f.len) { offset = int64(c.f.len) } c.seek(int32(offset)) return offset, nil } func (c *Caret) seek(off int32) { delta := off - c.pos // If the new offset is closer to the start or the end than to the current // c.pos, or if c's cached {p,l,b,k} values are invalid, move to the start // or end first. In case of a tie, we prefer to seek forwards (i.e. set // delta > 0). if (delta < 0 && -delta >= off) || (c.seqNum != c.f.seqNum) { c.seekStart() delta = off - c.pos } if delta > 0 && delta > int32(c.f.len)-off { c.seekEnd() delta = off - c.pos } if delta != 0 { // Seek forwards. for delta > 0 { if n := c.f.boxes[c.b].j - c.k; n > 0 { if n > delta { n = delta } c.pos += n c.k += n delta -= n } else if c.leanForwards() != leanOK { panic("text: invalid state") } } // Seek backwards. for delta < 0 { if n := c.f.boxes[c.b].i - c.k; n < 0 { if n < delta { n = delta } c.pos += n c.k += n delta -= n } else if c.leanBackwards() != leanOK { panic("text: invalid state") } } // A Caret can't be placed at the end of a Paragraph, unless it is the // final Paragraph. A simple way to enforce this is to lean forwards. c.leanForwards() } c.seqNum = c.f.seqNum } // Read satisfies the io.Reader interface by copying those bytes after the // Caret and incrementing the Caret. func (c *Caret) Read(buf []byte) (n int, err error) { c.calculatePLBK() for len(buf) > 0 { if j := c.f.boxes[c.b].j; c.k < j { nn := copy(buf, c.f.text[c.k:j]) buf = buf[nn:] n += nn c.pos += int32(nn) c.k += int32(nn) } // A Caret can't be placed at the end of a Paragraph, unless it is the // final Paragraph. A simple way to enforce this is to lean forwards. if c.leanForwards() == leanFailedEOF { break } } if int(c.pos) == c.f.len { err = io.EOF } return n, err } // ReadByte returns the next byte after the Caret and increments the Caret. func (c *Caret) ReadByte() (x byte, err error) { c.calculatePLBK() for { if j := c.f.boxes[c.b].j; c.k < j { x = c.f.text[c.k] c.pos++ c.k++ // A Caret can't be placed at the end of a Paragraph, unless it is // the final Paragraph. A simple way to enforce this is to lean // forwards. c.leanForwards() return x, nil } if c.leanForwards() == leanFailedEOF { return 0, io.EOF } } } // ReadRune returns the next rune after the Caret and increments the Caret. func (c *Caret) ReadRune() (r rune, size int, err error) { c.calculatePLBK() for { if c.k < c.f.boxes[c.b].j { r, size, c.b, c.k = c.f.readRune(c.b, c.k) c.pos += int32(size) // A Caret can't be placed at the end of a Paragraph, unless it is // the final Paragraph. A simple way to enforce this is to lean // forwards. c.leanForwards() return r, size, nil } if c.leanForwards() == leanFailedEOF { return 0, 0, io.EOF } } } // WriteByte inserts x into the Frame's text at the Caret and increments the // Caret. func (c *Caret) WriteByte(x byte) error { c.tmp[0] = x return c.write(c.tmp[:1], "") } // WriteRune inserts r into the Frame's text at the Caret and increments the // Caret. func (c *Caret) WriteRune(r rune) (size int, err error) { size = utf8.EncodeRune(c.tmp[:], r) if err = c.write(c.tmp[:size], ""); err != nil { return 0, err } return size, nil } // WriteString inserts s into the Frame's text at the Caret and increments the // Caret. func (c *Caret) WriteString(s string) (n int, err error) { for len(s) > 0 { i := 1 + strings.IndexByte(s, '\n') if i == 0 { i = len(s) } if err = c.write(nil, s[:i]); err != nil { break } n += i s = s[i:] } return n, err } // Write inserts s into the Frame's text at the Caret and increments the Caret. func (c *Caret) Write(s []byte) (n int, err error) { for len(s) > 0 { i := 1 + bytes.IndexByte(s, '\n') if i == 0 { i = len(s) } if err = c.write(s[:i], ""); err != nil { break } n += i s = s[i:] } return n, err } // write inserts a []byte or string into the Frame's text at the Caret. // // Exactly one of s0 and s1 must be non-empty. That non-empty argument must // contain at most one '\n' and if it does contain one, it must be the final // byte. func (c *Caret) write(s0 []byte, s1 string) error { if m := maxLen - len(c.f.text); len(s0) > m || len(s1) > m { return errors.New("text: insufficient space for writing") } // Ensure that the Caret is at the end of its Box, and that Box's text is // at the end of the Frame's buffer. c.calculatePLBK() for { bb, n := &c.f.boxes[c.b], int32(len(c.f.text)) if c.k == bb.j && c.k == n { break } // If the Box's text is empty, move its empty i:j range to the // equivalent empty range at the end of c.f.text. if bb.i == bb.j { bb.i = n bb.j = n for _, cc := range c.f.carets { if cc.b == c.b { cc.k = n } } continue } // Make the Caret be at the end of its Box. if c.k != bb.j { c.splitBox(true) continue } // Make a new empty Box and move the Caret to it. c.splitBox(true) c.leanForwards() } c.f.invalidateCaches() c.f.paragraphs[c.p].invalidateCaches() c.f.lines[c.l].invalidateCaches() length, nl := len(s0), false if length > 0 { nl = s0[length-1] == '\n' c.f.text = append(c.f.text, s0...) } else { length = len(s1) nl = s1[length-1] == '\n' c.f.text = append(c.f.text, s1...) } c.f.len += length c.f.boxes[c.b].j += int32(length) c.k += int32(length) for _, cc := range c.f.carets { if cc.pos > c.pos { cc.pos += int32(length) } } c.pos += int32(length) oldL := c.l if nl { breakParagraph(c.f, c.p, c.l, c.b) c.p = c.f.paragraphs[c.p].next c.l = c.f.paragraphs[c.p].firstL c.b = c.f.lines[c.l].firstB c.k = c.f.boxes[c.b].i } // TODO: re-layout the new c.p paragraph, if we saw '\n'. layout(c.f, oldL) c.f.seqNum++ return nil } // breakParagraph breaks the Paragraph p into two Paragraphs, just after Box b // in Line l in Paragraph p. b's text must end with a '\n'. The new Paragraph // is inserted after p. func breakParagraph(f *Frame, p, l, b int32) { // Assert that the Box b's text ends with a '\n'. if j := f.boxes[b].j; j == 0 || f.text[j-1] != '\n' { panic("text: invalid state") } // Make a new, empty Paragraph after this Paragraph p. newP, _ := f.newParagraph() nextP := f.paragraphs[p].next if nextP != 0 { f.paragraphs[nextP].prev = newP } f.paragraphs[newP].next = nextP f.paragraphs[newP].prev = p f.paragraphs[p].next = newP // Any Lines in this Paragraph after the break point's Line l move to the // newP Paragraph. if nextL := f.lines[l].next; nextL != 0 { f.lines[l].next = 0 f.lines[nextL].prev = 0 f.paragraphs[newP].firstL = nextL } // Any Boxes in this Line after the break point's Box b move to a new Line // at the start of the newP Paragraph. if nextB := f.boxes[b].next; nextB != 0 { f.boxes[b].next = 0 f.boxes[nextB].prev = 0 newL, _ := f.newLine() f.lines[newL].firstB = nextB if newPFirstL := f.paragraphs[newP].firstL; newPFirstL != 0 { f.lines[newL].next = newPFirstL f.lines[newPFirstL].prev = newL } f.paragraphs[newP].firstL = newL } // Make the newP Paragraph's first Line and first Box explicit, since // Carets require an explicit p, l and b. { pp := &f.paragraphs[newP] if pp.firstL == 0 { pp.firstL, _ = f.newLine() } ll := &f.lines[pp.firstL] if ll.firstB == 0 { ll.firstB, _ = f.newBox() } } // TODO: re-layout the newP paragraph. } // breakLine breaks the Line l at text index k in Box b. The b-and-k index must // not be at the start or end of the Line. Text to the right of b-and-k in the // Line l will be moved to the start of the next Line in the Paragraph, with // that next Line being created if it didn't already exist. func breakLine(f *Frame, l, b, k int32) { // Split this Box into two if necessary, so that k equals a Box's j end. bb := &f.boxes[b] if k != bb.j { if k == bb.i { panic("TODO: degenerate split left, possibly adjusting the Line's firstB??") } newB, realloc := f.newBox() if realloc { bb = &f.boxes[b] } nextB := bb.next if nextB != 0 { f.boxes[nextB].prev = newB } f.boxes[newB].next = nextB f.boxes[newB].prev = b f.boxes[newB].i = k f.boxes[newB].j = bb.j bb.next = newB bb.j = k } // Assert that the break point isn't already at the start or end of the Line. if bb.next == 0 || (bb.prev == 0 && k == bb.i) { panic("text: invalid state") } // Insert a line after this one, if one doesn't already exist. ll := &f.lines[l] if ll.next == 0 { newL, realloc := f.newLine() if realloc { ll = &f.lines[l] } f.lines[newL].prev = l ll.next = newL } // Move the remaining boxes to the next line. nextB, nextL := bb.next, ll.next bb.next = 0 f.boxes[nextB].prev = 0 fb := f.lines[nextL].firstB f.lines[nextL].firstB = nextB // If the next Line already contained Boxes, append them to the end of the // nextB chain, and join the two newly linked Boxes if possible. if fb != 0 { lb := f.lines[nextL].lastBox(f) lbb := &f.boxes[lb] fbb := &f.boxes[fb] lbb.next = fb fbb.prev = lb f.joinBoxes(lb, fb, lbb, fbb) } } // layout inserts a soft return in the Line l if its text measures longer than // f.maxWidth and a suitable line break point is found. This may spill text // onto the next line, which will also be laid out, and so on recursively. func layout(f *Frame, l int32) { if f.maxWidth <= 0 || f.face == nil { return } f.seqNum++ for ; l != 0; l = f.lines[l].next { var ( firstB = f.lines[l].firstB reader = f.lineReader(firstB, f.boxes[firstB].i) breakPoint bAndK prevR rune prevRValid bool advance fixed.Int26_6 ) for { r, _, err := reader.ReadRune() if err != nil || r == '\n' { return } if prevRValid { advance += f.face.Kern(prevR, r) } // TODO: match all whitespace, not just ' '? if r == ' ' { breakPoint = reader.bAndK() } a, ok := f.face.GlyphAdvance(r) if !ok { panic("TODO: is falling back on the U+FFFD glyph the responsibility of the caller or the Face?") } advance += a if r != ' ' && advance > f.maxWidth && breakPoint.b != 0 { breakLine(f, l, breakPoint.b, breakPoint.k) break } prevR, prevRValid = r, true } } } // Delete deletes nBytes bytes in the specified direction from the Caret's // location. It returns the number of bytes deleted, which can be fewer than // that requested if it hits the beginning or end of the Frame. func (c *Caret) Delete(dir Direction, nBytes int) (dBytes int) { if nBytes <= 0 { return 0 } // Convert a backwards delete of n bytes from position p to a forwards // delete of n bytes from position p-n. // // In general, it's easier to delete forwards than backwards. For example, // when crossing paragraph boundaries, it's easier to find the first line // of the next paragraph than the last line of the previous paragraph. if dir == Backwards { newPos := int(c.pos) - nBytes if newPos < 0 { newPos = 0 nBytes = int(c.pos) if nBytes == 0 { return 0 } } c.seek(int32(newPos)) } if int(c.pos) == c.f.len { return 0 } c.calculatePLBK() c.leanForwards() if c.f.boxes[c.b].i != c.k && c.splitBox(false) { c.leanForwards() } for nBytes > 0 && int(c.pos) != c.f.len { bb := &c.f.boxes[c.b] n := bb.j - bb.i newLine := n != 0 && c.f.text[bb.j-1] == '\n' if int(n) > nBytes { n = int32(nBytes) } bb.i += n c.k += n dBytes += int(n) nBytes -= int(n) c.f.len -= int(n) if bb.i != bb.j { break } if newLine { c.joinNextParagraph() } c.leanForwards() } // The mergeIntoOneLine will shake out any empty Boxes. l := c.f.mergeIntoOneLine(c.p) layout(c.f, l) c.f.invalidateCaches() // Compact c.f.text if it's large enough and the fraction of deleted text // is above some threshold. The actual threshold value (25%) is arbitrary. // A lower value means more frequent compactions, so less memory on average // but more CPU. A higher value means the opposite. if len(c.f.text) > 4096 && len(c.f.text)/4 < c.f.deletedLen() { c.f.compactText() } c.f.seqNum++ for _, cc := range c.f.carets { if cc == c { continue } switch relPos := cc.pos - c.pos; { case relPos <= 0: // No-op. case relPos <= int32(dBytes): cc.pos = c.pos default: cc.pos -= int32(dBytes) } } return dBytes } // DeleteRunes deletes nRunes runes in the specified direction from the Caret's // location. It returns the number of runes and bytes deleted, which can be // fewer than that requested if it hits the beginning or end of the Frame. func (c *Caret) DeleteRunes(dir Direction, nRunes int) (dRunes, dBytes int) { // Save the current Caret position, move the Caret by nRunes runes to // calculate how many bytes to delete, restore that saved Caret position, // then delete that many bytes. c.calculatePLBK() savedC := *c if dir == Forwards { for dRunes < nRunes { var size int _, size, c.b, c.k = c.f.readRune(c.b, c.k) if size != 0 { dRunes++ dBytes += size } else if c.leanForwards() != leanOK { break } } } else { for dRunes < nRunes { var size int _, size, c.b, c.k = c.f.readLastRune(c.b, c.k) if size != 0 { dRunes++ dBytes += size } else if c.leanBackwards() != leanOK { break } } } *c = savedC if dBytes != c.Delete(dir, dBytes) { panic("text: invalid state") } return dRunes, dBytes } // joinNextParagraph joins c's current and next Paragraph. That next Paragraph // must exist, and c must be at the last Line of its current Paragraph. func (c *Caret) joinNextParagraph() { pp0 := &c.f.paragraphs[c.p] ll0 := &c.f.lines[c.l] if pp0.next == 0 || ll0.next != 0 { panic("text: invalid state") } pp1 := &c.f.paragraphs[pp0.next] l1 := pp1.firstL ll0.next = l1 c.f.lines[l1].prev = c.l toFree := pp0.next pp0.next = pp1.next if pp0.next != 0 { c.f.paragraphs[pp0.next].prev = c.p } c.f.freeParagraph(toFree) } // splitBox splits the Caret's Box into two, at the Caret's location. Unless // force is set, it does nothing if the Caret is at either edge of its Box. It // returns whether the Box was split. If so, the new Box is created after, not // before, the Caret's current Box. func (c *Caret) splitBox(force bool) bool { bb := &c.f.boxes[c.b] if !force && (c.k == bb.i || c.k == bb.j) { return false } newB, realloc := c.f.newBox() if realloc { bb = &c.f.boxes[c.b] } nextB := bb.next if nextB != 0 { c.f.boxes[nextB].prev = newB } c.f.boxes[newB] = Box{ next: nextB, prev: c.b, i: c.k, j: bb.j, } bb.next = newB bb.j = c.k return true }