package spell import "sort" // bkTree is a Burkhard-Keller tree over the embedded wordlist: a metric tree // keyed by Levenshtein distance that answers "all words within edit distance k" // without scanning the whole dictionary. It is queried only on user trigger // (the suggestion popup), never per render. type bkTree struct { word string children map[int]*bkTree } // buildBKTree inserts words in the given order. Insertion order is descending // frequency, which keeps common words near the root and lets Suggest break // distance ties toward more frequent words cheaply. func buildBKTree(words []string) *bkTree { if len(words) == 0 { return nil } root := &bkTree{word: words[0], children: map[int]*bkTree{}} for _, w := range words[1:] { root.insert(w) } return root } func (t *bkTree) insert(w string) { d := levenshtein(t.word, w) if d == 0 { return // duplicate } if child, ok := t.children[d]; ok { child.insert(w) } else { t.children[d] = &bkTree{word: w, children: map[int]*bkTree{}} } } // candidate is a matched word with its edit distance from the query. type candidate struct { word string dist int } // within collects every word within maxDist of query into out. func (t *bkTree) within(query string, maxDist int, out *[]candidate) { if t == nil { return } d := levenshtein(t.word, query) if d <= maxDist { *out = append(*out, candidate{t.word, d}) } // Triangle inequality: only children whose edge distance falls in // [d-maxDist, d+maxDist] can hold matches. for edge, child := range t.children { if edge >= d-maxDist && edge <= d+maxDist { child.within(query, maxDist, out) } } } // Suggest returns up to max corrections for word within edit distance 2, ranked // by ascending edit distance and, within a distance, by ascending frequency // rank (more common first). func (d *Dict) Suggest(word string, max int) []string { if d.bk == nil || max <= 0 { return nil } q := lowerASCII(word) var cands []candidate // The BK-tree prunes with plain Levenshtein (a true metric, so the triangle // inequality holds); a radius of 2 also captures single transpositions, // which Levenshtein scores as 2. d.bk.within(q, 2, &cands) // Re-score with the transposition-aware OSA distance so "teh"->"the" ranks as // a distance-1 fix, then break ties toward more frequent words. for i := range cands { cands[i].dist = osaDistance(q, cands[i].word) } sort.SliceStable(cands, func(i, j int) bool { if cands[i].dist != cands[j].dist { return cands[i].dist < cands[j].dist } return d.rank[cands[i].word] < d.rank[cands[j].word] }) out := make([]string, 0, max) for _, c := range cands { if c.dist == 0 { continue // the query itself is spelled fine; nothing to suggest } out = append(out, c.word) if len(out) >= max { break } } return out } func lowerASCII(s string) string { b := []byte(s) for i, c := range b { if c >= 'A' && c <= 'Z' { b[i] = c + 32 } } return string(b) } // levenshtein is the classic two-row edit distance over runes. func levenshtein(a, b string) int { ra, rb := []rune(a), []rune(b) if len(ra) == 0 { return len(rb) } if len(rb) == 0 { return len(ra) } prev := make([]int, len(rb)+1) curr := make([]int, len(rb)+1) for j := range prev { prev[j] = j } for i := 1; i <= len(ra); i++ { curr[0] = i for j := 1; j <= len(rb); j++ { cost := 1 if ra[i-1] == rb[j-1] { cost = 0 } curr[j] = min3(prev[j]+1, curr[j-1]+1, prev[j-1]+cost) } prev, curr = curr, prev } return prev[len(rb)] } // osaDistance is the optimal string alignment distance: Levenshtein plus // adjacent transpositions at cost 1 (so "teh" is one edit from "the"). Used only // to rank suggestion candidates, not for BK-tree pruning. func osaDistance(a, b string) int { ra, rb := []rune(a), []rune(b) n, m := len(ra), len(rb) if n == 0 { return m } if m == 0 { return n } d := make([][]int, n+1) for i := range d { d[i] = make([]int, m+1) d[i][0] = i } for j := 0; j <= m; j++ { d[0][j] = j } for i := 1; i <= n; i++ { for j := 1; j <= m; j++ { cost := 1 if ra[i-1] == rb[j-1] { cost = 0 } d[i][j] = min3(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost) if i > 1 && j > 1 && ra[i-1] == rb[j-2] && ra[i-2] == rb[j-1] { if t := d[i-2][j-2] + 1; t < d[i][j] { d[i][j] = t } } } } return d[n][m] } func min3(a, b, c int) int { if b < a { a = b } if c < a { a = c } return a }