1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
// Package spell provides a lightweight, pure-Go spellchecker for glint: an
// embedded common-English wordlist for membership tests, a BK-tree for
// edit-distance suggestions, and a hand-editable personal dictionary. It carries
// no cgo dependency so `go build .` and the Homebrew formula stay clean.
package spell
import (
"bufio"
"bytes"
"compress/gzip"
_ "embed"
"strings"
)
//go:embed words.txt.gz
var wordsGz []byte
// Dict is a loaded spellchecker: an embedded common-word set plus a personal
// dictionary, with a BK-tree over the embedded words for suggestions.
type Dict struct {
words map[string]struct{} // embedded words, lowercased
ranked []string // embedded words in descending frequency order
rank map[string]int // word -> frequency rank (0 = most common)
personal map[string]struct{} // personal-dictionary words, lowercased
bk *bkTree // suggestion index over embedded words
personalPath string // ~/.config/glint/dict.txt; "" until SetPersonalPath
}
// Load reads the embedded wordlist into a Dict. The personal dictionary is not
// loaded here; call LoadPersonal once the config path is known.
func Load() (*Dict, error) {
zr, err := gzip.NewReader(bytes.NewReader(wordsGz))
if err != nil {
return nil, err
}
defer func() { _ = zr.Close() }()
d := &Dict{
words: make(map[string]struct{}, 60000),
rank: make(map[string]int, 60000),
personal: make(map[string]struct{}),
}
sc := bufio.NewScanner(zr)
for sc.Scan() {
w := strings.TrimSpace(sc.Text())
if w == "" {
continue
}
if _, dup := d.words[w]; dup {
continue
}
d.rank[w] = len(d.ranked)
d.ranked = append(d.ranked, w)
d.words[w] = struct{}{}
}
if err := sc.Err(); err != nil {
return nil, err
}
d.bk = buildBKTree(d.ranked)
return d, nil
}
// Known reports whether word is spelled correctly: a direct (case-insensitive)
// hit in either dictionary, or a lenient match after stripping a trailing
// possessive ('s) or a simple plural/inflection so "editor's" and "editors"
// ride on "editor".
func (d *Dict) Known(word string) bool {
w := strings.ToLower(strings.TrimSpace(word))
if w == "" {
return true
}
if d.has(w) {
return true
}
// Possessive: trim a trailing 's or ' and re-check the base.
if base, ok := trimPossessive(w); ok && d.has(base) {
return true
}
return false
}
// has is a direct membership test across the embedded and personal sets.
func (d *Dict) has(w string) bool {
if _, ok := d.words[w]; ok {
return true
}
_, ok := d.personal[w]
return ok
}
// trimPossessive removes a trailing "'s" or "'" (straight or curly apostrophe),
// returning the base word and whether a trim happened.
func trimPossessive(w string) (string, bool) {
for _, suf := range []string{"'s", "’s", "'", "’"} {
if strings.HasSuffix(w, suf) {
return strings.TrimSuffix(w, suf), true
}
}
return w, false
}
|